├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── commoncrawl.R ├── dependencies.R ├── package.R ├── sample.R └── sparkwarc.R ├── README.Rmd ├── README.md ├── inst ├── java │ ├── sparkwarc-1.5-2.10.jar │ ├── sparkwarc-1.6-2.10.jar │ ├── sparkwarc-2.0-2.11.jar │ ├── sparkwarc-2.1-2.11.jar │ ├── sparkwarc-2.2-2.11.jar │ ├── sparkwarc-2.3-2.11.jar │ ├── sparkwarc-2.4-2.11.jar │ ├── sparkwarc-2.4-2.12.jar │ └── sparkwarc-3.0-2.12.jar └── samples │ ├── sample.warc │ ├── sample.warc.gz │ ├── sample.warc.paths │ ├── sample.wat │ ├── sample.wat.gz │ ├── sample.wet │ └── sample.wet.gz ├── java └── SparkWARC.scala ├── man ├── cc_warc.Rd ├── rcpp_read_warc_sample.Rd ├── spark_rcpp_read_warc.Rd ├── spark_read_warc.Rd ├── spark_read_warc_sample.Rd ├── spark_warc_sample_path.Rd └── sparkwarc.Rd ├── sparkwarc.Rproj └── src ├── Makevars ├── RcppExports.cpp └── warc.cpp /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | derby.log$ 4 | sparkwarc.Rcheck$ 5 | log4j.spark.log.*$ 6 | ^README\.Rmd$ 7 | ^README_files$ 8 | ^README_images$ 9 | ^NEWS\.md$ 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | # Session Data files 5 | .RData 6 | # User Data files 7 | .Ruserdata 8 | # Example code in package build process 9 | *-Ex.R 10 | # RStudio files 11 | .Rproj.user 12 | # produced vignettes 13 | vignettes/*.html 14 | vignettes/*.pdf 15 | # internal files (e.g. scratch files) 16 | internal 17 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 18 | .httr-oauth 19 | # Spark binaries temp directory 20 | spark 21 | # Hive metadata 22 | metastore_db/ 23 | derby.log 24 | # README.md cache 25 | README_cache 26 | # Perf.md cache 27 | perf_cache 28 | spark-warehouse 29 | # Temp parquet files 30 | docs/*.parquet 31 | .DS_Store 32 | demo.R 33 | # Spark log 34 | log4j.spark* 35 | /configure 36 | /bin/ 37 | .cache-main 38 | .settings 39 | .classpath 40 | # Rcpp 41 | /src/*.o 42 | /src/*.o-* 43 | /src/*.d 44 | /src/*.so 45 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sparkwarc 2 | Type: Package 3 | Title: Load WARC Files into Apache Spark 4 | Version: 0.1.6 5 | Authors@R: c(person("Javier", "Luraschi", email = "jluraschi@gmail.com", role = c("aut")), 6 | person("Yitao", "Li", role = "aut", email = "yitaoli1990@gmail.com", 7 | comment = c(ORCID = "0000-0002-1261-905X")), 8 | person("Edgar", "Ruiz", email = "edgar@rstudio.com", role = c("aut", "cre")) 9 | ) 10 | Maintainer: Edgar Ruiz 11 | Description: Load WARC (Web ARChive) files into Apache Spark using 'sparklyr'. This 12 | allows to read files from the Common Crawl project . 13 | License: Apache License 2.0 14 | BugReports: https://github.com/r-spark/sparkwarc 15 | Encoding: UTF-8 16 | LazyData: true 17 | Imports: 18 | DBI, 19 | sparklyr, 20 | Rcpp 21 | RoxygenNote: 7.1.1 22 | LinkingTo: 23 | Rcpp, 24 | SystemRequirements: C++11 25 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(cc_warc) 4 | export(rcpp_read_warc_sample) 5 | export(spark_rcpp_read_warc) 6 | export(spark_read_warc) 7 | export(spark_read_warc_sample) 8 | export(spark_warc_sample_path) 9 | import(DBI) 10 | import(Rcpp) 11 | import(sparklyr) 12 | importFrom(utils,download.file) 13 | importFrom(utils,read.table) 14 | useDynLib(sparkwarc, .registration = TRUE) 15 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # sparkwarc 0.1.6 2 | 3 | - Edgar Ruiz (https://github.com/edgararuiz) will be the new maintainer of this 4 | package moving forward. 5 | 6 | # sparkwarc 0.1.5 7 | 8 | - `sparkwarc` is a `sparklyr` extension for loading WARC (Web ARChive) files to Apache Spark. 9 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | rcpp_read_warc <- function(path, filter, include) { 5 | .Call(`_sparkwarc_rcpp_read_warc`, path, filter, include) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /R/commoncrawl.R: -------------------------------------------------------------------------------- 1 | #' Provides WARC paths for commoncrawl.org 2 | #' 3 | #' Provides WARC paths for commoncrawl.org. To be used with 4 | #' \code{spark_read_warc}. 5 | #' 6 | #' @param start The first path to retrieve. 7 | #' @param end The last path to retrieve. 8 | #' 9 | #' @examples 10 | #' 11 | #' cc_warc(1) 12 | #' cc_warc(2, 3) 13 | #' 14 | #' @export 15 | #' @importFrom utils read.table 16 | cc_warc <- function(start, end = start) { 17 | warcPathsFile <- system.file("samples/sample.warc.paths", package = "sparkwarc") 18 | warcPaths <- read.table(warcPathsFile) 19 | paste(warcPaths[seq(start, end), ], collapse = ",") 20 | } 21 | -------------------------------------------------------------------------------- /R/dependencies.R: -------------------------------------------------------------------------------- 1 | spark_dependencies <- function(spark_version, scala_version, ...) { 2 | sparklyr::spark_dependency( 3 | jars = c( 4 | system.file( 5 | sprintf("java/sparkwarc-%s-%s.jar", spark_version, scala_version), 6 | package = "sparkwarc" 7 | ) 8 | ), 9 | packages = c( 10 | ) 11 | ) 12 | } 13 | 14 | #' @import sparklyr 15 | .onLoad <- function(libname, pkgname) { 16 | sparklyr::register_extension(pkgname) 17 | } 18 | 19 | .onUnload <- function(libpath) { 20 | library.dynam.unload("sparkwarc", libpath) 21 | } 22 | -------------------------------------------------------------------------------- /R/package.R: -------------------------------------------------------------------------------- 1 | #' sparkwarc 2 | #' 3 | #' Sparklyr extension for loading WARC Files into Apache Spark 4 | #' 5 | #' @docType package 6 | #' @import Rcpp 7 | #' @name sparkwarc 8 | NULL 9 | -------------------------------------------------------------------------------- /R/sample.R: -------------------------------------------------------------------------------- 1 | #' Retrieves sample warc path 2 | #' 3 | #' @export 4 | spark_warc_sample_path <- function() { 5 | normalizePath(system.file("samples/sample.warc.gz", package = "sparkwarc")) 6 | } 7 | 8 | #' Loads the sample warc file in Rcpp 9 | #' 10 | #' @param filter A regular expression used to filter to each warc entry 11 | #' efficiently by running native code using \code{Rcpp}. 12 | #' @param include A regular expression used to keep only matching lines 13 | #' efficiently by running native code using \code{Rcpp}. 14 | #' 15 | #' @export 16 | rcpp_read_warc_sample <- function(filter = "", include = "") { 17 | sample_warc <- spark_warc_sample_path() 18 | 19 | rcpp_read_warc(sample_warc, filter, include) 20 | } 21 | 22 | #' Loads the sample warc file in Spark 23 | #' 24 | #' @param sc An active \code{spark_connection}. 25 | #' @param filter A regular expression used to filter to each warc entry 26 | #' efficiently by running native code using \code{Rcpp}. 27 | #' @param include A regular expression used to keep only matching lines 28 | #' efficiently by running native code using \code{Rcpp}. 29 | #' 30 | #' @export 31 | spark_read_warc_sample <- function(sc, filter = "", include = "") { 32 | sample_warc <- spark_warc_sample_path() 33 | 34 | spark_read_warc( 35 | sc, 36 | "sample_warc", 37 | sample_warc, 38 | overwrite = TRUE, 39 | group = TRUE, 40 | filter = filter, 41 | include = include) 42 | } 43 | -------------------------------------------------------------------------------- /R/sparkwarc.R: -------------------------------------------------------------------------------- 1 | #' Reads a WARC File into Apache Spark 2 | #' 3 | #' Reads a WARC (Web ARChive) file into Apache Spark using sparklyr. 4 | #' 5 | #' @param sc An active \code{spark_connection}. 6 | #' @param name The name to assign to the newly generated table. 7 | #' @param path The path to the file. Needs to be accessible from the cluster. 8 | #' Supports the \samp{"hdfs://"}, \samp{"s3n://"} and \samp{"file://"} protocols. 9 | #' @param repartition The number of partitions used to distribute the 10 | #' generated table. Use 0 (the default) to avoid partitioning. 11 | #' @param memory Boolean; should the data be loaded eagerly into memory? (That 12 | #' is, should the table be cached?) 13 | #' @param overwrite Boolean; overwrite the table with the given name if it 14 | #' already exists? 15 | #' @param match_warc include only warc files mathcing this character string. 16 | #' @param match_line include only lines mathcing this character string. 17 | #' @param parser which parser implementation to use? Options are "scala" 18 | #' or "r" (default). 19 | #' @param ... Additional arguments reserved for future use. 20 | #' 21 | #' @examples 22 | #' 23 | #' \dontrun{ 24 | #' library(sparklyr) 25 | #' library(sparkwarc) 26 | #' sc <- spark_connect(master = "local") 27 | #' sdf <- spark_read_warc( 28 | #' sc, 29 | #' name = "sample_warc", 30 | #' path = system.file(file.path("samples", "sample.warc"), package = "sparkwarc"), 31 | #' memory = FALSE, 32 | #' overwrite = FALSE 33 | #' ) 34 | #' 35 | #' spark_disconnect(sc) 36 | #'} 37 | #' 38 | #' @import DBI 39 | #' @importFrom utils download.file 40 | #' @export 41 | spark_read_warc <- function(sc, 42 | name, 43 | path, 44 | repartition = 0L, 45 | memory = TRUE, 46 | overwrite = TRUE, 47 | match_warc = "", 48 | match_line = "", 49 | parser = c("r", "scala"), 50 | ...) { 51 | if (overwrite && name %in% dbListTables(sc)) { 52 | dbRemoveTable(sc, name) 53 | } 54 | 55 | if (!is.null(parse) && !parser %in% c("r", "scala")) 56 | stop("Invalid 'parser' value, must be 'r' or 'scala'") 57 | 58 | if (is.null(parser) || parser == "r") { 59 | paths_df <- data.frame(paths = strsplit(path, ",")[[1]]) 60 | path_repartition <- if (identical(repartition, 0L)) nrow(paths_df) else repartition 61 | paths_tbl <- sdf_copy_to( 62 | sc, 63 | paths_df, 64 | name = "sparkwarc_paths", 65 | overwrite = TRUE, 66 | repartition = as.integer(path_repartition)) 67 | 68 | df <- spark_apply(paths_tbl, function(df) { 69 | entries <- apply(df, 1, function(path) { 70 | spark_apply_log("is processing warc path ", path) 71 | temp_warc <- NULL 72 | 73 | if (grepl("s3n://", path)) { 74 | aws_enabled <- length(system2("which", "aws", stdout = TRUE)) > 0 75 | temp_warc <- tempfile(fileext = ".warc.gz") 76 | 77 | if (aws_enabled) { 78 | spark_apply_log("is downloading warc file using aws") 79 | path <- sub("s3n://", "s3://", path) 80 | 81 | system2("aws", c("s3", "cp", path, temp_warc)) 82 | } 83 | else { 84 | spark_apply_log("is downloading warc file using download.file") 85 | 86 | path <- sub("s3n://commoncrawl/", "https://commoncrawl.s3.amazonaws.com/", path) 87 | download.file(url = path, destfile = temp_warc) 88 | } 89 | 90 | path <- temp_warc 91 | spark_apply_log("finished downloading warc file") 92 | } 93 | 94 | result <- spark_rcpp_read_warc(path, match_warc, match_line) 95 | 96 | if (!is.null(temp_warc)) unlink(temp_warc) 97 | 98 | result 99 | }) 100 | 101 | if (nrow(df) > 1) do.call("rbind", entries) else data.frame(entries) 102 | }, columns = c( 103 | tags = "double", 104 | content = "character" 105 | )) %>% spark_dataframe() 106 | } 107 | else { 108 | if (nchar(match_warc) > 0) stop("Scala parser does not support 'match_warc'") 109 | 110 | df <- sparklyr::invoke_static( 111 | sc, 112 | "SparkWARC.WARC", 113 | "parse", 114 | spark_context(sc), 115 | path, 116 | match_line, 117 | as.integer(repartition)) 118 | } 119 | 120 | result_tbl <- sdf_register(df, name) 121 | 122 | if (memory) { 123 | dbGetQuery(sc, paste("CACHE TABLE", DBI::dbQuoteIdentifier(sc, name))) 124 | dbGetQuery(sc, paste("SELECT count(*) FROM", DBI::dbQuoteIdentifier(sc, name))) 125 | } 126 | 127 | result_tbl 128 | } 129 | 130 | #' Reads a WARC File into using Rcpp 131 | #' 132 | #' Reads a WARC (Web ARChive) file using Rcpp. 133 | #' 134 | #' @param path The path to the file. Needs to be accessible from the cluster. 135 | #' Supports the \samp{"hdfs://"}, \samp{"s3n://"} and \samp{"file://"} protocols. 136 | #' @param match_warc include only warc files mathcing this character string. 137 | #' @param match_line include only lines mathcing this character string. 138 | #' 139 | #' @useDynLib sparkwarc, .registration = TRUE 140 | #' 141 | #' @export 142 | spark_rcpp_read_warc <- function(path, match_warc, match_line) { 143 | rcpp_read_warc(path, filter = match_warc, include = match_line) 144 | } 145 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "sparkwarc - WARC files in sparklyr" 3 | output: 4 | github_document: 5 | fig_width: 9 6 | fig_height: 5 7 | --- 8 | 9 | # Install 10 | 11 | Install using with: 12 | 13 | ```{r eval=FALSE} 14 | devtools::install_github("javierluraschi/sparkwarc") 15 | ``` 16 | 17 | # Intro 18 | 19 | The following example loads a very small subset of a WARC file from [Common Crawl](http://commoncrawl.org), a nonprofit 501 organization that crawls the web and freely provides its archives and datasets to the public. 20 | 21 | ```{r message=FALSE} 22 | library(sparkwarc) 23 | library(sparklyr) 24 | library(DBI) 25 | library(dplyr) 26 | ``` 27 | 28 | ```{r connect-1, max.print=10} 29 | sc <- spark_connect(master = "local") 30 | ``` 31 | 32 | ```{r load-sample} 33 | spark_read_warc(sc, path = spark_warc_sample_path(), name = "WARC") 34 | ``` 35 | 36 | ```{sql query-1, connection=sc, max.print=1} 37 | SELECT count(value) 38 | FROM WARC 39 | WHERE length(regexp_extract(value, ' 0 40 | ``` 41 | 42 | ```{r functions-1} 43 | cc_regex <- function(ops) { 44 | ops %>% 45 | filter(regval != "") %>% 46 | group_by(regval) %>% 47 | summarize(count = n()) %>% 48 | arrange(desc(count)) %>% 49 | head(100) 50 | } 51 | 52 | cc_stats <- function(regex) { 53 | tbl(sc, "warc") %>% 54 | transmute(regval = regexp_extract(value, regex, 1)) %>% 55 | cc_regex() 56 | } 57 | ``` 58 | 59 | ```{r query-2} 60 | cc_stats("http-equiv=\"Content-Language\" content=\"(.*)\"") 61 | ``` 62 | 63 | ```{r query-3} 64 | cc_stats(" 76 | 77 | 78 | 79 | 82 | 83 | 86 | 87 | 88 |
89 |
90 | 91 | 92 |

+ 38 048 738 08 08

93 | 94 | 104 | 105 |   106 | 107 |
108 | 124 |   125 |   126 |
127 |
128 |
129 | 133 | 134 | 135 | 136 | 137 | 138 |
139 | 140 | 141 |
142 |
143 |    144 |
145 |

Foxy, спортивный клуб «Контакты»

Фокси
146 |    147 |
148 |
149 |
150 | 151 | 152 |
153 |
154 | 155 | 156 | 157 | 158 |
159 |
160 |
161 |
162 |
163 |    164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 |
АдресТелефонРежим работы
г. Одесса, Ул. Прохоровская, 12, 2 этаж0936248218
г. Одесса, Ул. Бочарова, 440674826371, 7714136
181 |    182 |
183 |
184 | 185 | 186 | 187 | 188 | 189 |
190 |

Просмотров Foxy, спортивный клуб за 30 дней: 70
191 | Просмотров Foxy, спортивный клуб с момента добавления: 6596

192 |
193 | 201 |    202 |
203 | 204 | 205 | 206 |
 
207 |
208 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | WARC/1.0 287 | WARC-Type: metadata 288 | WARC-Date: 2016-12-11T14:00:57Z 289 | WARC-Record-ID: 290 | Content-Length: 20 291 | Content-Type: application/warc-fields 292 | WARC-Warcinfo-ID: 293 | WARC-Concurrent-To: 294 | WARC-Target-URI: http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts 295 | 296 | fetchTimeMs: 546 297 | 298 | 299 | 300 | WARC/1.0 301 | WARC-Type: request 302 | WARC-Date: 2016-12-11T14:08:53Z 303 | WARC-Record-ID: 304 | Content-Length: 274 305 | Content-Type: application/http; msgtype=request 306 | WARC-Warcinfo-ID: 307 | WARC-IP-Address: 213.155.18.48 308 | WARC-Target-URI: http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy 309 | 310 | GET /legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy HTTP/1.0 311 | Host: 08.od.ua 312 | Accept-Encoding: x-gzip, gzip, deflate 313 | User-Agent: CCBot/2.0 (http://commoncrawl.org/faq/) 314 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 315 | 316 | 317 | 318 | WARC/1.0 319 | WARC-Type: response 320 | WARC-Date: 2016-12-11T14:08:53Z 321 | WARC-Record-ID: 322 | Content-Length: 18697 323 | Content-Type: application/http; msgtype=response 324 | WARC-Warcinfo-ID: 325 | WARC-Concurrent-To: 326 | WARC-IP-Address: 213.155.18.48 327 | WARC-Target-URI: http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy 328 | WARC-Payload-Digest: sha1:7JHU6APXI3HCJJBOG5QBKK5FV2IHYVBB 329 | WARC-Block-Digest: sha1:FFICQINSVMNO5O5VOZ5QLIXXVUIXLGNX 330 | WARC-Truncated: length 331 | 332 | HTTP/1.1 200 OK 333 | Set-Cookie: design=default; expires=Mon, 11-Dec-2017 13:22:06 GMT; path=/ 334 | Connection: close 335 | Server: nginx/1.4.4 336 | Vary: Accept-Encoding 337 | Date: Sun, 11 Dec 2016 13:22:06 GMT 338 | Content-Type: text/html; charset=UTF-8 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | Регистрация-ликвидация предприятий, Легал, юридическая компания в Одессе, - Регистрация-ликвидация предприятий 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 362 | 363 | 366 | 367 | 368 |
369 |
370 | 371 | 372 |

+ 38 048 738 08 08

373 | 374 | 384 | 385 |   386 | 387 |
388 | 404 |   405 |   406 |
407 |
408 |
409 | 413 | 414 | 415 | 416 | 417 | 418 |
419 | 420 | 421 |
422 |
423 |    424 |
425 |

Легал, юридическая компания → Товар «Регистрация-ликвидация предприятий»

426 |    427 |
428 |
429 |
430 | 431 | 432 |
433 |
434 | 435 | 436 | 437 | 438 |
439 |
440 |
441 |
442 |
443 |    444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 |
АдресТелефонРежим работы
г. Одесса, Ул. Генерала ватутина, 197018871
456 |    457 |
458 |
459 | 460 | 461 | 462 | 463 |
464 | 470 | 471 |
472 |    473 |

Прайс-листы

474 | 477 |    478 |
479 |
480 | 481 |
482 |    483 |
484 |
485 |

Регистрация-ликвидация предприятий

486 |
487 |

488 |

489 |
Регистрация и ликвидация предприятий
490 |
491 | 492 |
493 |    494 |
495 |
496 |
497 | 498 |
499 |
500 |
501 |
    502 |
  • Регистрация-ликвидация предприятий
  • 503 |
  • Количество *0.00 грн.
  • 504 |
  • Фамилия *
  • 505 |
  • Имя *
  • 506 |
  • Отчество *
  • 507 |
  • Город *
  • 508 |
  • Адрес (улица, № дома/квартиры) *
  • 509 |
  • Номер телефона (+380931111111) *
  • 510 |
  • E-mail *
  • 511 |
  • Желаемая дата и время доставки
  • 512 |
  • 513 |
    514 | 515 |
    516 | Коментарии 517 |
  • 518 | 519 |
520 |
521 |
522 |
523 |

ОФОРМИТЬ ЗАКАЗ

524 |
525 | 526 | 527 | 528 | 529 |
530 |
531 | 532 | 533 |
534 |

Просмотров Легал, юридическая компания за 30 дней: 10
535 | Просмотров Легал, юридическая компания с момента добавления: 1353

536 |
537 | 545 |    546 |
547 | 548 | 549 | 550 |
 
551 |
552 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 593 | 594 | 595 | 596 | 636 | 637 | 638 | 639 | 640 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | WARC/1.0 672 | WARC-Type: metadata 673 | WARC-Date: 2016-12-11T14:08:53Z 674 | WARC-Record-ID: 675 | Content-Length: 20 676 | Content-Type: application/warc-fields 677 | WARC-Warcinfo-ID: 678 | WARC-Concurrent-To: 679 | WARC-Target-URI: http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy 680 | 681 | fetchTimeMs: 507 682 | 683 | 684 | 685 | WARC/1.0 686 | WARC-Type: request 687 | WARC-Date: 2016-12-11T14:06:26Z 688 | WARC-Record-ID: 689 | Content-Length: 294 690 | Content-Type: application/http; msgtype=request 691 | WARC-Warcinfo-ID: 692 | WARC-IP-Address: 213.155.18.48 693 | WARC-Target-URI: http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya 694 | 695 | GET /produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya HTTP/1.0 696 | Host: 08.od.ua 697 | Accept-Encoding: x-gzip, gzip, deflate 698 | User-Agent: CCBot/2.0 (http://commoncrawl.org/faq/) 699 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 700 | 701 | 702 | 703 | WARC/1.0 704 | WARC-Type: response 705 | WARC-Date: 2016-12-11T14:06:26Z 706 | WARC-Record-ID: 707 | Content-Length: 15642 708 | Content-Type: application/http; msgtype=response 709 | WARC-Warcinfo-ID: 710 | WARC-Concurrent-To: 711 | WARC-IP-Address: 213.155.18.48 712 | WARC-Target-URI: http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya 713 | WARC-Payload-Digest: sha1:PWJTTHLLGF34EXCSND67NBAXD52BGKCM 714 | WARC-Block-Digest: sha1:3ZZRIL2JIZOAZT6QQIMBKPSC4FATWFHI 715 | WARC-Truncated: length 716 | 717 | HTTP/1.1 200 OK 718 | Set-Cookie: design=default; expires=Mon, 11-Dec-2017 13:19:40 GMT; path=/ 719 | Connection: close 720 | Server: nginx/1.4.4 721 | Vary: Accept-Encoding 722 | Date: Sun, 11 Dec 2016 13:19:40 GMT 723 | Content-Type: text/html; charset=UTF-8 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | Вип Степ, дистрибьютерская компания органических продуктов питания в Одессе, Продукты питания - Специи 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 747 | 748 | 751 | 752 | 753 |
754 |
755 | 756 | 757 |

+ 38 048 738 08 08

758 | 759 | 769 | 770 |   771 | 772 |
773 | 789 |   790 |   791 |
792 |
793 |
794 | 798 | 799 | 800 | 801 | 802 | 803 |
804 | 805 | 806 |
807 |
808 |    809 |
810 |

Вип Степ, дистрибьютерская компания органических продуктов питания → «Специи»

811 |    812 |
813 |
814 |
815 | 816 | 817 |
818 |
819 |    820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 |
АдресТелефонРежим работы
г. Одесса, Ул. Троицкая, 33а7223015
832 |
833 | 834 | 835 | 838 | 839 |
836 |

Веб-сайт:
837 | organico.od.ua

840 |
841 |    842 |
843 |
844 | 845 | 846 | 847 |
848 |
849 | Специи 850 |
851 |
852 |    853 |

Соусы

854 |    855 |
856 | 857 |
858 | 864 |
865 | 866 |
867 |    868 |

Прайс-листы

869 | 875 |    876 |
877 | 878 |
879 |
880 | 881 | 882 | 883 |
   884 |
885 |
886 | 887 |
888 | 889 | 890 |
891 |

Просмотров Вип Степ, дистрибьютерская компания органических продуктов питания за 30 дней: 66
892 | Просмотров Вип Степ, дистрибьютерская компания органических продуктов питания с момента добавления: 3166

893 |
894 | 902 |    903 |
904 | 905 | 906 | 907 |
 
908 |
909 | 930 | 931 | 932 | 933 | 934 | 935 | 936 | 937 | 938 | 939 | 940 | 941 | 942 | 943 | 950 | 951 | 952 | 953 | 954 | 955 | 956 | 980 | 981 | 982 | 983 | 984 | 985 | 986 | 987 | WARC/1.0 988 | WARC-Type: metadata 989 | WARC-Date: 2016-12-11T14:06:26Z 990 | WARC-Record-ID: 991 | Content-Length: 20 992 | Content-Type: application/warc-fields 993 | WARC-Warcinfo-ID: 994 | WARC-Concurrent-To: 995 | WARC-Target-URI: http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya 996 | 997 | fetchTimeMs: 519 998 | 999 | 1000 | 1001 | WARC/1.0 1002 | WARC-Type: request 1003 | WARC-Date: 2016-12-11T13:54:37Z 1004 | WARC-Record-ID: 1005 | Content-Length: 244 1006 | Content-Type: application/http; msgtype=request 1007 | WARC-Warcinfo-ID: 1008 | WARC-IP-Address: 213.155.18.48 1009 | WARC-Target-URI: http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo 1010 | 1011 | GET /stroymaterialy/pilomaterialy/les_ooo_lis_ooo HTTP/1.0 1012 | Host: 08.od.ua 1013 | Accept-Encoding: x-gzip, gzip, deflate 1014 | User-Agent: CCBot/2.0 (http://commoncrawl.org/faq/) 1015 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 1016 | 1017 | 1018 | 1019 | WARC/1.0 1020 | WARC-Type: response 1021 | WARC-Date: 2016-12-11T13:54:37Z 1022 | WARC-Record-ID: 1023 | Content-Length: 16042 1024 | Content-Type: application/http; msgtype=response 1025 | WARC-Warcinfo-ID: 1026 | WARC-Concurrent-To: 1027 | WARC-IP-Address: 213.155.18.48 1028 | WARC-Target-URI: http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo 1029 | WARC-Payload-Digest: sha1:FMPEPMTWKD3YKRF76EUODBKHCY4VAABP 1030 | WARC-Block-Digest: sha1:YQICPN5F5IALIV3X2ND4LOU5TIHAMZPJ 1031 | WARC-Truncated: length 1032 | 1033 | HTTP/1.1 200 OK 1034 | Set-Cookie: design=default; expires=Mon, 11-Dec-2017 13:07:50 GMT; path=/ 1035 | Connection: close 1036 | Server: nginx/1.4.4 1037 | Vary: Accept-Encoding 1038 | Date: Sun, 11 Dec 2016 13:07:50 GMT 1039 | Content-Type: text/html; charset=UTF-8 1040 | 1041 | 1042 | 1043 | 1044 | 1045 | 1046 | 1047 | 1048 | Лес, ооо (лис, ооо) в Одессе, Стройматериалы - Пиломатериалы 1049 | 1050 | 1051 | 1052 | 1053 | 1054 | 1055 | 1056 | 1057 | 1058 | 1059 | 1060 | 1063 | 1064 | 1067 | 1068 | 1069 |
1070 |
1071 | 1072 | 1073 |

+ 38 048 738 08 08

1074 | 1075 | 1085 | 1086 |   1087 | 1088 |
1089 | 1105 |   1106 |   1107 |
1108 |
1109 |
1110 | 1114 | 1115 | 1116 | 1117 | 1118 | 1119 |
1120 | 1121 | 1122 |
1123 |
1124 |    1125 |
1126 |

Лес, ооо (лис, ооо) → «Пиломатериалы»

1127 |    1128 |
1129 |
1130 |
1131 | 1132 | 1133 |
1134 |
1135 |    1136 | 1137 | 1138 | 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | 1145 | 1146 | 1147 |
АдресТелефонРежим работы
г. Одесса, Ул. Бугаевская, 587222888Пн-сб с 8 до 17
1148 |
1149 | 1150 | 1151 | 1154 | 1155 |
1152 |

Веб-сайт:
1153 | odessa-lis.all.biz

1156 |
1157 |    1158 |
1159 |
1160 | 1161 | 1162 | 1163 |
1164 |
1165 | Пиломатериалы 1166 |
1167 |
1168 |    1169 |

Пиломатериалы:
1170 | Доска обрезная
1171 | Доска необрезная
1172 | Брус
1173 | Доска пола (сосна)

1174 |    1175 |
1176 | 1177 |
1178 | 1184 |
1185 | 1186 | 1203 | 1204 |
1205 |
1206 |    1207 | 1208 | 1209 | 1210 | 1211 | 1212 | 1213 | 1214 | 1219 | 1225 | 1226 | 1227 |
НазваниеФотоОписание
1215 | 1218 | 1220 | 1221 |
1222 | Опилки, стружка 1223 |
1224 |
1228 | 1229 | 1230 | 1231 |

   1232 |
1233 |
1234 | 1235 |
1236 | 1237 | 1238 |
1239 |

Просмотров Лес, ооо (лис, ооо) за 30 дней: 113
1240 | Просмотров Лес, ооо (лис, ооо) с момента добавления: 11849

1241 |
1242 | 1250 |    1251 |
1252 | 1253 | 1254 | 1255 |
 
1256 |
1257 | 1278 | 1279 | 1280 | 1281 | 1282 | 1283 | 1284 | 1285 | 1286 | 1287 | 1288 | 1289 | 1290 | 1291 | 1298 | 1299 | 1300 | 1301 | 1302 | 1303 | 1304 | 1328 | 1329 | 1330 | 1331 | 1332 | 1333 | 1334 | 1335 | WARC/1.0 1336 | WARC-Type: metadata 1337 | WARC-Date: 2016-12-11T13:54:37Z 1338 | WARC-Record-ID: 1339 | Content-Length: 20 1340 | Content-Type: application/warc-fields 1341 | WARC-Warcinfo-ID: 1342 | WARC-Concurrent-To: 1343 | WARC-Target-URI: http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo 1344 | 1345 | fetchTimeMs: 516 1346 | 1347 | 1348 | 1349 | WARC/1.0 1350 | WARC-Type: request 1351 | WARC-Date: 2016-12-11T14:08:36Z 1352 | WARC-Record-ID: 1353 | Content-Length: 241 1354 | Content-Type: application/http; msgtype=request 1355 | WARC-Warcinfo-ID: 1356 | WARC-IP-Address: 213.155.18.48 1357 | WARC-Target-URI: http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts 1358 | 1359 | GET /svoboda_vseukrainskoe_obedinenie/contacts HTTP/1.0 1360 | Host: 08.od.ua 1361 | Accept-Encoding: x-gzip, gzip, deflate 1362 | User-Agent: CCBot/2.0 (http://commoncrawl.org/faq/) 1363 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 1364 | 1365 | 1366 | 1367 | WARC/1.0 1368 | WARC-Type: response 1369 | WARC-Date: 2016-12-11T14:08:36Z 1370 | WARC-Record-ID: 1371 | Content-Length: 11576 1372 | Content-Type: application/http; msgtype=response 1373 | WARC-Warcinfo-ID: 1374 | WARC-Concurrent-To: 1375 | WARC-IP-Address: 213.155.18.48 1376 | WARC-Target-URI: http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts 1377 | WARC-Payload-Digest: sha1:OJQLP3IBNRTJTF3U4HEZQ24ETE2DZMCQ 1378 | WARC-Block-Digest: sha1:Q6EEMLUSOMRC77UBE34TB6TRDT6MGQD4 1379 | WARC-Truncated: length 1380 | 1381 | HTTP/1.1 200 OK 1382 | Set-Cookie: design=default; expires=Mon, 11-Dec-2017 13:21:49 GMT; path=/ 1383 | Connection: close 1384 | Server: nginx/1.4.4 1385 | Vary: Accept-Encoding 1386 | Date: Sun, 11 Dec 2016 13:21:49 GMT 1387 | Content-Type: text/html; charset=UTF-8 1388 | 1389 | 1390 | 1391 | 1392 | 1393 | 1394 | 1395 | 1396 | Свобода, всеукраинское объединение в Одессе - Контакты 1397 | 1398 | 1399 | 1400 | 1401 | 1402 | 1403 | 1404 | 1405 | 1406 | 1407 | 1408 | 1411 | 1412 | 1415 | 1416 | 1417 |
1418 |
1419 | 1420 | 1421 |

+ 38 048 738 08 08

1422 | 1423 | 1433 | 1434 |   1435 | 1436 |
1437 | 1453 |   1454 |   1455 |
1456 |
1457 |
1458 | 1462 | 1463 | 1464 | 1465 | 1466 | 1467 |
1468 | 1469 | 1470 |
1471 |
1472 |    1473 |
1474 |

Свобода, всеукраинское объединение «Контакты»

1475 |    1476 |
1477 |
1478 |
1479 | 1480 | 1481 |
1482 |
1483 | 1484 | 1485 | 1486 | 1487 |
1488 |
1489 |
1490 |
1491 |
1492 |    1493 | 1494 | 1495 | 1496 | 1497 | 1498 | 1499 | 1500 | 1501 | 1502 | 1503 | 1504 |
АдресТелефонРежим работы
г. Одесса, Ул. Канатная, 837015463
1505 |    1506 |
1507 |
1508 | 1509 | 1510 | 1511 | 1512 | 1513 |
1514 |

Просмотров Свобода, всеукраинское объединение за 30 дней: 41
1515 | Просмотров Свобода, всеукраинское объединение с момента добавления: 2101

1516 |
1517 | 1525 |    1526 |
1527 | 1528 | 1529 | 1530 |
 
1531 |
1532 | 1553 | 1554 | 1555 | 1556 | 1557 | 1558 | 1559 | 1560 | 1561 | 1562 | 1563 | 1564 | 1565 | 1566 | 1573 | 1574 | 1575 | 1576 | 1577 | 1578 | 1579 | 1603 | 1604 | 1605 | 1606 | 1607 | 1608 | 1609 | 1610 | WARC/1.0 1611 | WARC-Type: metadata 1612 | WARC-Date: 2016-12-11T14:08:36Z 1613 | WARC-Record-ID: 1614 | Content-Length: 20 1615 | Content-Type: application/warc-fields 1616 | WARC-Warcinfo-ID: 1617 | WARC-Concurrent-To: 1618 | WARC-Target-URI: http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts 1619 | 1620 | fetchTimeMs: 506 1621 | 1622 | 1623 | 1624 | WARC/1.0 1625 | WARC-Type: request 1626 | WARC-Date: 2016-12-11T13:48:17Z 1627 | WARC-Record-ID: 1628 | Content-Length: 200 1629 | Content-Type: application/http; msgtype=request 1630 | WARC-Warcinfo-ID: 1631 | WARC-IP-Address: 184.95.38.196 1632 | WARC-Target-URI: http://0800z.be/ 1633 | 1634 | GET / HTTP/1.0 1635 | Host: 0800z.be 1636 | Accept-Encoding: x-gzip, gzip, deflate 1637 | User-Agent: CCBot/2.0 (http://commoncrawl.org/faq/) 1638 | Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 1639 | 1640 | 1641 | 1642 | WARC/1.0 1643 | WARC-Type: response 1644 | WARC-Date: 2016-12-11T13:48:17Z 1645 | WARC-Record-ID: 1646 | Content-Length: 2901 1647 | Content-Type: application/http; msgtype=response 1648 | WARC-Warcinfo-ID: 1649 | WARC-Concurrent-To: 1650 | WARC-IP-Address: 184.95.38.196 1651 | WARC-Target-URI: http://0800z.be/ 1652 | WARC-Payload-Digest: sha1:UEEEEHO3NRAHTYVPY67YKWF7QKWCRYUV 1653 | WARC-Block-Digest: sha1:E3GQ64AVXQXBSHXPJJ7X2JLZUBKYK7SX 1654 | 1655 | HTTP/1.1 200 OK 1656 | Content-Length: 2586 1657 | Expires: Tue, 13 Dec 2016 13:48:17 GMT 1658 | Connection: close 1659 | X-Powered-By: PHP/5.3.3 1660 | Server: Apache/2.2.15 (CentOS) 1661 | Cache-Control: no-cache, must-revalidate, max-age=172800 1662 | Vary: Accept-Encoding 1663 | Date: Sun, 11 Dec 2016 13:48:17 GMT 1664 | Content-Type: text/html; charset=UTF-8 1665 | 1666 | 1667 | 1668 | 1669 | 0800z.be - Domain parked by Europe Registry 1670 | 1671 | 1672 | 1673 | 1683 | 1684 | 1685 | 1686 | 1687 | 1703 | 1704 | 1705 | 1710 | 1711 |
1688 | 1689 | 1690 | 1700 | 1701 |
1691 |
1692 | 1693 | 1694 | 1695 | 1696 |

Domain Parked with Europe Registry

1697 |
1698 |
1699 |
1702 |
1706 | 1707 | Click here to visit the Europe Registry website. 1708 | 1709 |
1712 | 1713 | 1714 | 1715 | -------------------------------------------------------------------------------- /inst/samples/sample.warc.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-spark/sparkwarc/c3e8975ad7fb49bde159d33afc5a72d09e220b2b/inst/samples/sample.warc.gz -------------------------------------------------------------------------------- /inst/samples/sample.wat: -------------------------------------------------------------------------------- 1 | WARC/1.0 2 | WARC-Type: warcinfo 3 | WARC-Date: 2016-12-15T09:37:09Z 4 | WARC-Filename: CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz 5 | WARC-Record-ID: 6 | Content-Type: application/warc-fields 7 | Content-Length: 108 8 | 9 | Software-Info: ia-web-commons.1.0-SNAPSHOT-20161209033514 10 | Extracted-Date: Thu, 15 Dec 2016 09:37:09 GMT 11 | 12 | 13 | 14 | WARC/1.0 15 | WARC-Type: metadata 16 | WARC-Target-URI: CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz 17 | WARC-Date: 2016-12-13T03:16:04Z 18 | WARC-Record-ID: 19 | WARC-Refers-To: 20 | Content-Type: application/json 21 | Content-Length: 1152 22 | 23 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"273","Block-Digest":"sha1:4IFONO3B2NA4YWQKS5WDTNCVVTGPAFVA","Actual-Content-Length":"344","WARC-Header-Metadata":{"WARC-Type":"warcinfo","WARC-Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz","WARC-Date":"2016-12-13T03:16:04Z","Content-Length":"344","WARC-Record-ID":"","Content-Type":"application/warc-fields"},"Payload-Metadata":{"Trailing-Slop-Length":"0","Actual-Content-Type":"application/warc-fields","Actual-Content-Length":"344","Headers-Corrupt":true,"WARC-Info-Metadata":{"robots":"classic","software":"Nutch 1.6 (CC)/CC WarcExport 1.0","description":"Wide crawl of the web for November 2016","hostname":"ip-10-31-129-80.ec2.internal","format":"WARC File Format 1.0","isPartOf":"CC-MAIN-2016-50","operator":"CommonCrawl Admin","publisher":"CommonCrawl"}}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"433","Header-Length":"10","Inflated-CRC":"-1950615864","Inflated-Length":"621"},"Offset":"0","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 24 | 25 | WARC/1.0 26 | WARC-Type: metadata 27 | WARC-Target-URI: http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts 28 | WARC-Date: 2016-12-11T14:00:57Z 29 | WARC-Record-ID: 30 | WARC-Refers-To: 31 | Content-Type: application/json 32 | Content-Length: 1368 33 | 34 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"369","Block-Digest":"sha1:KWY63CB25DR6WC3F4F2JRIWWYNBLNSXB","Actual-Content-Length":"235","WARC-Header-Metadata":{"WARC-Type":"request","WARC-Date":"2016-12-11T14:00:57Z","WARC-Warcinfo-ID":"","Content-Length":"235","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts","WARC-IP-Address":"213.155.18.48","Content-Type":"application/http; msgtype=request"},"Payload-Metadata":{"Trailing-Slop-Length":"4","HTTP-Request-Metadata":{"Headers":{"Host":"08.od.ua","Accept-Encoding":"x-gzip, gzip, deflate","User-Agent":"CCBot/2.0 (http://commoncrawl.org/faq/)","Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},"Headers-Length":"233","Entity-Length":"0","Entity-Trailing-Slop-Bytes":"0","Request-Message":{"Method":"GET","Version":"HTTP/1.0","Path":"/foxy_foksi_sportivnyy_klub/contacts"},"Entity-Digest":"sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ"},"Actual-Content-Type":"application/http; msgtype=request"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"414","Header-Length":"10","Inflated-CRC":"-844740052","Inflated-Length":"608"},"Offset":"433","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 35 | 36 | WARC/1.0 37 | WARC-Type: metadata 38 | WARC-Target-URI: http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts 39 | WARC-Date: 2016-12-11T14:00:57Z 40 | WARC-Record-ID: 41 | WARC-Refers-To: 42 | Content-Type: application/json 43 | Content-Length: 6006 44 | 45 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"584","Block-Digest":"sha1:DCVA76BB5FHOPJ6CDQZM6XS3VID6K7X5","Actual-Content-Length":"11436","WARC-Header-Metadata":{"WARC-Type":"response","WARC-Truncated":"length","WARC-Date":"2016-12-11T14:00:57Z","WARC-Warcinfo-ID":"","Content-Length":"11436","WARC-Record-ID":"","WARC-Block-Digest":"sha1:DCVA76BB5FHOPJ6CDQZM6XS3VID6K7X5","WARC-Payload-Digest":"sha1:PF3NBPEM3K7Y7HA3N7NA4EPHDZURGMLV","WARC-Target-URI":"http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts","WARC-IP-Address":"213.155.18.48","WARC-Concurrent-To":"","Content-Type":"application/http; msgtype=response"},"Payload-Metadata":{"Trailing-Slop-Length":"4","Actual-Content-Type":"application/http; msgtype=response","HTTP-Response-Metadata":{"Headers":{"Date":"Sun, 11 Dec 2016 13:14:10 GMT","Vary":"Accept-Encoding","Set-Cookie":"design=default; expires=Mon, 11-Dec-2017 13:14:10 GMT; path=/","Content-Type":"text/html; charset=UTF-8","Connection":"close","Server":"nginx/1.4.4"},"Headers-Length":"234","Entity-Length":"11202","Entity-Trailing-Slop-Bytes":"0","Response-Message":{"Status":"200","Version":"HTTP/1.1","Reason":"OK"},"HTML-Metadata":{"Links":[{"text":"Справочная Одессы","title":"Справочная Одессы","path":"A@/href","url":"/"},{"path":"FORM@/action","url":"/poisk/"},{"alt":"","path":"IMG@/src","url":"/design/default/images/loader.gif"},{"text":"О фирме","title":"Foxy, спортивный клуб в Одессе","path":"A@/href","url":"/foxy_foksi_sportivnyy_klub/main"},{"text":"Каталог продукции","title":"Foxy, спортивный клуб - Каталог продукции в Одессе","path":"A@/href","url":"/foxy_foksi_sportivnyy_klub"},{"text":"Прайс-листы","title":"Цены в Foxy, спортивный клуб - Предприятия Одессы","path":"A@/href","url":"/sport_pohod_rybalka/tancy_sovremennye/foxy_foksi_sportivnyy_klub"},{"text":"Контакты","title":"Карта Одесса","path":"A@/href","url":"/foxy_foksi_sportivnyy_klub/contacts"},{"text":"О фирме","title":"Foxy, спортивный клуб в Одессе","path":"A@/href","url":"/foxy_foksi_sportivnyy_klub/main"},{"text":"Каталог продукции","title":"Foxy, спортивный клуб - Каталог продукции в Одессе","path":"A@/href","url":"/foxy_foksi_sportivnyy_klub"},{"text":"Прайс-листы","title":"Цены в Foxy, спортивный клуб - Предприятия Одессы","path":"A@/href","url":"/sport_pohod_rybalka/tancy_sovremennye/foxy_foksi_sportivnyy_klub"},{"text":"Контакты","title":"Одесса карта проезда","path":"A@/href","url":"/foxy_foksi_sportivnyy_klub/contacts"},{"text":"http://08.od.ua","title":"Одесса сайт","path":"A@/href","url":"/"},{"alt":"bigmir)net TOP 100","path":"IMG@/src","url":"http://c.bigmir.net/?v16840041&s16839405&t21"},{"alt":"Rambler's Top100","path":"IMG@/src","url":"/design/default/images/rambler.gif"},{"alt":"LiveInternet","path":"IMG@/src","url":"http://counter.yadro.ru/hit?t44.6;uhttp%3A%2F%2F08.od.ua%2Ffoxy_foksi_sportivnyy_klub%2Fcontacts"},{"alt":"hit.ua","path":"IMG@/src","url":"http://c.hit.ua/hit?i=107467&g=0&x=2&s=1&c=1&t=-120&r=&u=http%3A%2F%2F08.od.ua%2Ffoxy_foksi_sportivnyy_klub%2Fcontacts"},{"alt":"","path":"IMG@/src","url":"http://counter.rambler.ru/top100.scn?1919603"},{"alt":"","path":"IMG@/src","url":"//mc.yandex.ru/watch/19895821"},{"path":"A@/href","url":"https://plus.google.com/108905381149799728781"}],"Head":{"Link":[{"path":"LINK@/href","rel":"icon","type":"image/x-icon","url":"/design/default/images/spravochnaya.ico"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/coners.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/buttons.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/styles.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/lytebox.css"}],"Scripts":[{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/lytebox.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"https://apis.google.com/js/plusone.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/08.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-1.4.2.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.formtips.1.2.2.packed.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-ui-1.7.2.custom.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.equalHeight.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/start.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"http://www.google-analytics.com/ga.js"}],"Metas":[{"content":"text/html; charset=UTF-8","http-equiv":"Content-Type"},{"content":"ru-RU","http-equiv":"Content-Language"},{"content":"Foxy, спортивный клуб в Одессе - Контакты","name":"description"},{"content":"Foxy, спортивный клуб, Одесса","name":"keywords"},{"content":"initial-scale=1.0, user-scalable=no","name":"viewport"}],"Title":"Foxy, спортивный клуб в Одессе - Контакты"}},"Entity-Digest":"sha1:PF3NBPEM3K7Y7HA3N7NA4EPHDZURGMLV"}}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"4100","Header-Length":"10","Inflated-CRC":"-225326031","Inflated-Length":"12024"},"Offset":"847","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 46 | 47 | WARC/1.0 48 | WARC-Type: metadata 49 | WARC-Target-URI: http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts 50 | WARC-Date: 2016-12-11T14:00:57Z 51 | WARC-Record-ID: 52 | WARC-Refers-To: 53 | Content-Type: application/json 54 | Content-Length: 1069 55 | 56 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"396","Block-Digest":"sha1:ZPDPEJVIT7WFP2MPZQUI4G2ZBTXGBI3I","Actual-Content-Length":"20","WARC-Header-Metadata":{"WARC-Type":"metadata","WARC-Date":"2016-12-11T14:00:57Z","WARC-Warcinfo-ID":"","Content-Length":"20","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/foxy_foksi_sportivnyy_klub/contacts","WARC-Concurrent-To":"","Content-Type":"application/warc-fields"},"Payload-Metadata":{"Trailing-Slop-Length":"4","WARC-Metadata-Metadata":{"Trailing-Slop-Length":"0","Metadata-Records":[{"Name":"fetchTimeMs","Value":"546"}],"Actual-Content-Length":"20"},"Actual-Content-Type":"application/metadata-fields"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"312","Header-Length":"10","Inflated-CRC":"-1693307385","Inflated-Length":"420"},"Offset":"4947","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 57 | 58 | WARC/1.0 59 | WARC-Type: metadata 60 | WARC-Target-URI: http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy 61 | WARC-Date: 2016-12-11T14:08:53Z 62 | WARC-Record-ID: 63 | WARC-Refers-To: 64 | Content-Type: application/json 65 | Content-Length: 1446 66 | 67 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"408","Block-Digest":"sha1:IRNXXX5UJLRQVTNUX7YD3HVXCHYUDYLC","Actual-Content-Length":"274","WARC-Header-Metadata":{"WARC-Type":"request","WARC-Date":"2016-12-11T14:08:53Z","WARC-Warcinfo-ID":"","Content-Length":"274","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy","WARC-IP-Address":"213.155.18.48","Content-Type":"application/http; msgtype=request"},"Payload-Metadata":{"Trailing-Slop-Length":"4","HTTP-Request-Metadata":{"Headers":{"Host":"08.od.ua","Accept-Encoding":"x-gzip, gzip, deflate","User-Agent":"CCBot/2.0 (http://commoncrawl.org/faq/)","Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},"Headers-Length":"272","Entity-Length":"0","Entity-Trailing-Slop-Bytes":"0","Request-Message":{"Method":"GET","Version":"HTTP/1.0","Path":"/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy"},"Entity-Digest":"sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ"},"Actual-Content-Type":"application/http; msgtype=request"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"436","Header-Length":"10","Inflated-CRC":"397771969","Inflated-Length":"686"},"Offset":"5259","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 68 | 69 | WARC/1.0 70 | WARC-Type: metadata 71 | WARC-Target-URI: http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy 72 | WARC-Date: 2016-12-11T14:08:53Z 73 | WARC-Record-ID: 74 | WARC-Refers-To: 75 | Content-Type: application/json 76 | Content-Length: 7447 77 | 78 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"623","Block-Digest":"sha1:FFICQINSVMNO5O5VOZ5QLIXXVUIXLGNX","Actual-Content-Length":"18697","WARC-Header-Metadata":{"WARC-Type":"response","WARC-Truncated":"length","WARC-Date":"2016-12-11T14:08:53Z","WARC-Warcinfo-ID":"","Content-Length":"18697","WARC-Record-ID":"","WARC-Block-Digest":"sha1:FFICQINSVMNO5O5VOZ5QLIXXVUIXLGNX","WARC-Payload-Digest":"sha1:7JHU6APXI3HCJJBOG5QBKK5FV2IHYVBB","WARC-Target-URI":"http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy","WARC-IP-Address":"213.155.18.48","WARC-Concurrent-To":"","Content-Type":"application/http; msgtype=response"},"Payload-Metadata":{"Trailing-Slop-Length":"4","Actual-Content-Type":"application/http; msgtype=response","HTTP-Response-Metadata":{"Headers":{"Date":"Sun, 11 Dec 2016 13:22:06 GMT","Vary":"Accept-Encoding","Set-Cookie":"design=default; expires=Mon, 11-Dec-2017 13:22:06 GMT; path=/","Content-Type":"text/html; charset=UTF-8","Connection":"close","Server":"nginx/1.4.4"},"Headers-Length":"234","Entity-Length":"18463","Entity-Trailing-Slop-Bytes":"0","Response-Message":{"Status":"200","Version":"HTTP/1.1","Reason":"OK"},"HTML-Metadata":{"Links":[{"text":"Справочная Одессы","title":"Справочная Одессы","path":"A@/href","url":"/"},{"path":"FORM@/action","url":"/poisk/"},{"alt":"","path":"IMG@/src","url":"/design/default/images/loader.gif"},{"text":"О фирме","title":"Легал, юридическая компания в Одессе","path":"A@/href","url":"/legal_yuridicheskaya_kompaniya/main"},{"text":"Каталог продукции","title":"Легал, юридическая компания - Каталог продукции в Одессе","path":"A@/href","url":"/legal_yuridicheskaya_kompaniya"},{"text":"Прайс-листы","title":"Цены в Легал, юридическая компания - Предприятия Одессы","path":"A@/href","url":"/uslugi_raznoe/registraciya_likvidaciya_predpriyatiy/legal_yuridicheskaya_kompaniya"},{"text":"Контакты","title":"Карта Одесса","path":"A@/href","url":"/legal_yuridicheskaya_kompaniya/contacts"},{"text":"Главная","title":"Сайт г. Одесса","path":"A@/href","url":"/"},{"text":"Услуги (разное)","title":"Услуги (разное) в Одессе","path":"A@/href","url":"/uslugi_raznoe"},{"text":"Регистрация-ликвидация предприятий","title":"Регистрация-ликвидация предприятий Одесса","path":"A@/href","url":"/uslugi_raznoe/registraciya_likvidaciya_predpriyatiy"},{"text":"Регистрация-ликвидация предприятий (1)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/uslugi_raznoe/registraciya_likvidaciya_predpriyatiy/legal_yuridicheskaya_kompaniya"},{"path":"FORM@/action","method":"post","url":""},{"text":"О фирме","title":"Легал, юридическая компания в Одессе","path":"A@/href","url":"/legal_yuridicheskaya_kompaniya/main"},{"text":"Каталог продукции","title":"Легал, юридическая компания - Каталог продукции в Одессе","path":"A@/href","url":"/legal_yuridicheskaya_kompaniya"},{"text":"Прайс-листы","title":"Цены в Легал, юридическая компания - Предприятия Одессы","path":"A@/href","url":"/uslugi_raznoe/registraciya_likvidaciya_predpriyatiy/legal_yuridicheskaya_kompaniya"},{"text":"Контакты","title":"Одесса карта проезда","path":"A@/href","url":"/legal_yuridicheskaya_kompaniya/contacts"},{"text":"http://08.od.ua","title":"Одесса сайт","path":"A@/href","url":"/"},{"alt":"bigmir)net TOP 100","path":"IMG@/src","url":"http://c.bigmir.net/?v16840041&s16839405&t21"},{"alt":"Rambler's Top100","path":"IMG@/src","url":"/design/default/images/rambler.gif"},{"alt":"LiveInternet","path":"IMG@/src","url":"http://counter.yadro.ru/hit?t44.6;uhttp%3A%2F%2F08.od.ua%2Flegal_yuridicheskaya_kompaniya%2Ftovar_registraciya_likvidaciya_predpriyatiy"},{"alt":"hit.ua","path":"IMG@/src","url":"http://c.hit.ua/hit?i=107467&g=0&x=2&s=1&c=1&t=-120&r=&u=http%3A%2F%2F08.od.ua%2Flegal_yuridicheskaya_kompaniya%2Ftovar_registraciya_likvidaciya_predpriyatiy"},{"alt":"","path":"IMG@/src","url":"http://counter.rambler.ru/top100.scn?1919603"},{"alt":"","path":"IMG@/src","url":"//mc.yandex.ru/watch/19895821"},{"path":"A@/href","url":"https://plus.google.com/108905381149799728781"}],"Head":{"Link":[{"path":"LINK@/href","rel":"icon","type":"image/x-icon","url":"/design/default/images/spravochnaya.ico"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/coners.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/buttons.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/styles.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/lytebox.css"}],"Scripts":[{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/lytebox.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"https://apis.google.com/js/plusone.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/08.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-1.4.2.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.formtips.1.2.2.packed.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-ui-1.7.2.custom.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.equalHeight.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/start.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"http://www.google-analytics.com/ga.js"}],"Metas":[{"content":"text/html; charset=UTF-8","http-equiv":"Content-Type"},{"content":"ru-RU","http-equiv":"Content-Language"},{"content":"Регистрация-ликвидация предприятий, Легал, юридическая компания в Одессе, - Регистрация-ликвидация предприятий","name":"description"},{"content":"Регистрация-ликвидация предприятий, Легал, юридическая компания, Одесса, Регистрация-ликвидация предприятий","name":"keywords"},{"content":"initial-scale=1.0, user-scalable=no","name":"viewport"}],"Title":"Регистрация-ликвидация предприятий, Легал, юридическая компания в Одессе, - Регистрация-ликвидация п"}},"Entity-Digest":"sha1:7JHU6APXI3HCJJBOG5QBKK5FV2IHYVBB"}}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"5638","Header-Length":"10","Inflated-CRC":"1979387829","Inflated-Length":"19324"},"Offset":"5695","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 79 | 80 | WARC/1.0 81 | WARC-Type: metadata 82 | WARC-Target-URI: http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy 83 | WARC-Date: 2016-12-11T14:08:53Z 84 | WARC-Record-ID: 85 | WARC-Refers-To: 86 | Content-Type: application/json 87 | Content-Length: 1109 88 | 89 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"435","Block-Digest":"sha1:CSVMGMSX34EDBU6FWV56GCCXSDL4JIBY","Actual-Content-Length":"20","WARC-Header-Metadata":{"WARC-Type":"metadata","WARC-Date":"2016-12-11T14:08:53Z","WARC-Warcinfo-ID":"","Content-Length":"20","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/legal_yuridicheskaya_kompaniya/tovar_registraciya_likvidaciya_predpriyatiy","WARC-Concurrent-To":"","Content-Type":"application/warc-fields"},"Payload-Metadata":{"Trailing-Slop-Length":"4","WARC-Metadata-Metadata":{"Trailing-Slop-Length":"0","Metadata-Records":[{"Name":"fetchTimeMs","Value":"507"}],"Actual-Content-Length":"20"},"Actual-Content-Type":"application/metadata-fields"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"333","Header-Length":"10","Inflated-CRC":"-1513167009","Inflated-Length":"459"},"Offset":"11333","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 90 | 91 | WARC/1.0 92 | WARC-Type: metadata 93 | WARC-Target-URI: http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya 94 | WARC-Date: 2016-12-11T14:06:26Z 95 | WARC-Record-ID: 96 | WARC-Refers-To: 97 | Content-Type: application/json 98 | Content-Length: 1487 99 | 100 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"428","Block-Digest":"sha1:3JQAOVDRSUD3MWMZ5JDOH7XJF2TOOP7U","Actual-Content-Length":"294","WARC-Header-Metadata":{"WARC-Type":"request","WARC-Date":"2016-12-11T14:06:26Z","WARC-Warcinfo-ID":"","Content-Length":"294","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya","WARC-IP-Address":"213.155.18.48","Content-Type":"application/http; msgtype=request"},"Payload-Metadata":{"Trailing-Slop-Length":"4","HTTP-Request-Metadata":{"Headers":{"Host":"08.od.ua","Accept-Encoding":"x-gzip, gzip, deflate","User-Agent":"CCBot/2.0 (http://commoncrawl.org/faq/)","Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},"Headers-Length":"292","Entity-Length":"0","Entity-Trailing-Slop-Bytes":"0","Request-Message":{"Method":"GET","Version":"HTTP/1.0","Path":"/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},"Entity-Digest":"sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ"},"Actual-Content-Type":"application/http; msgtype=request"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"447","Header-Length":"10","Inflated-CRC":"651108995","Inflated-Length":"726"},"Offset":"11666","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 101 | 102 | WARC/1.0 103 | WARC-Type: metadata 104 | WARC-Target-URI: http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya 105 | WARC-Date: 2016-12-11T14:06:26Z 106 | WARC-Record-ID: 107 | WARC-Refers-To: 108 | Content-Type: application/json 109 | Content-Length: 8622 110 | 111 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"643","Block-Digest":"sha1:3ZZRIL2JIZOAZT6QQIMBKPSC4FATWFHI","Actual-Content-Length":"15642","WARC-Header-Metadata":{"WARC-Type":"response","WARC-Truncated":"length","WARC-Date":"2016-12-11T14:06:26Z","WARC-Warcinfo-ID":"","Content-Length":"15642","WARC-Record-ID":"","WARC-Block-Digest":"sha1:3ZZRIL2JIZOAZT6QQIMBKPSC4FATWFHI","WARC-Payload-Digest":"sha1:PWJTTHLLGF34EXCSND67NBAXD52BGKCM","WARC-Target-URI":"http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya","WARC-IP-Address":"213.155.18.48","WARC-Concurrent-To":"","Content-Type":"application/http; msgtype=response"},"Payload-Metadata":{"Trailing-Slop-Length":"4","Actual-Content-Type":"application/http; msgtype=response","HTTP-Response-Metadata":{"Headers":{"Date":"Sun, 11 Dec 2016 13:19:40 GMT","Vary":"Accept-Encoding","Set-Cookie":"design=default; expires=Mon, 11-Dec-2017 13:19:40 GMT; path=/","Content-Type":"text/html; charset=UTF-8","Connection":"close","Server":"nginx/1.4.4"},"Headers-Length":"234","Entity-Length":"15408","Entity-Trailing-Slop-Bytes":"0","Response-Message":{"Status":"200","Version":"HTTP/1.1","Reason":"OK"},"HTML-Metadata":{"Links":[{"text":"Справочная Одессы","title":"Справочная Одессы","path":"A@/href","url":"/"},{"path":"FORM@/action","url":"/poisk/"},{"alt":"","path":"IMG@/src","url":"/design/default/images/loader.gif"},{"text":"О фирме","title":"Вип Степ, дистрибьютерская компания органических продуктов питания в Одессе","path":"A@/href","url":"/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya/main"},{"text":"Каталог продукции","title":"Вип Степ, дистрибьютерская компания органических продуктов питания - Каталог продукции в Одессе","path":"A@/href","url":"/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Прайс-листы","title":"Цены в Вип Степ, дистрибьютерская компания органических продуктов питания - Предприятия Одессы","path":"A@/href","url":"/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Контакты","title":"Карта Одесса","path":"A@/href","url":"/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya/contacts"},{"text":"Главная","title":"Сайт г. Одесса","path":"A@/href","url":"/"},{"text":"Продукты питания","title":"Продукты питания в Одессе","path":"A@/href","url":"/produkty_pitaniya"},{"text":"Специи","title":"Специи Одесса","path":"A@/href","url":"/produkty_pitaniya/specii"},{"text":"Специи (0)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Повидло (0)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/produkty_pitaniya/povidlo/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Масло сливочное, растительное (0)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/produkty_pitaniya/maslo/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Соки (0)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/produkty_pitaniya/soki/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"О фирме","title":"Вип Степ, дистрибьютерская компания органических продуктов питания в Одессе","path":"A@/href","url":"/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya/main"},{"text":"Каталог продукции","title":"Вип Степ, дистрибьютерская компания органических продуктов питания - Каталог продукции в Одессе","path":"A@/href","url":"/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Прайс-листы","title":"Цены в Вип Степ, дистрибьютерская компания органических продуктов питания - Предприятия Одессы","path":"A@/href","url":"/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"text":"Контакты","title":"Одесса карта проезда","path":"A@/href","url":"/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya/contacts"},{"text":"http://08.od.ua","title":"Одесса сайт","path":"A@/href","url":"/"},{"alt":"bigmir)net TOP 100","path":"IMG@/src","url":"http://c.bigmir.net/?v16840041&s16839405&t21"},{"alt":"Rambler's Top100","path":"IMG@/src","url":"/design/default/images/rambler.gif"},{"alt":"LiveInternet","path":"IMG@/src","url":"http://counter.yadro.ru/hit?t44.6;uhttp%3A%2F%2F08.od.ua%2Fprodukty_pitaniya%2Fspecii%2Fvip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"alt":"hit.ua","path":"IMG@/src","url":"http://c.hit.ua/hit?i=107467&g=0&x=2&s=1&c=1&t=-120&r=&u=http%3A%2F%2F08.od.ua%2Fprodukty_pitaniya%2Fspecii%2Fvip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya"},{"alt":"","path":"IMG@/src","url":"http://counter.rambler.ru/top100.scn?1919603"},{"alt":"","path":"IMG@/src","url":"//mc.yandex.ru/watch/19895821"},{"path":"A@/href","url":"https://plus.google.com/108905381149799728781"}],"Head":{"Link":[{"path":"LINK@/href","rel":"icon","type":"image/x-icon","url":"/design/default/images/spravochnaya.ico"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/coners.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/buttons.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/styles.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/lytebox.css"}],"Scripts":[{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/lytebox.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"https://apis.google.com/js/plusone.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/08.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-1.4.2.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.formtips.1.2.2.packed.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-ui-1.7.2.custom.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.equalHeight.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/start.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"http://www.google-analytics.com/ga.js"}],"Metas":[{"content":"text/html; charset=UTF-8","http-equiv":"Content-Type"},{"content":"ru-RU","http-equiv":"Content-Language"},{"content":"Вип Степ, дистрибьютерская компания органических продуктов питания в Одессе, Продукты питания - Специи","name":"description"},{"content":"Вип Степ, дистрибьютерская компания органических продуктов питания, Одесса, Продукты питания - Специи","name":"keywords"},{"content":"initial-scale=1.0, user-scalable=no","name":"viewport"}],"Title":"Вип Степ, дистрибьютерская компания органических продуктов питания в Одессе, Продукты питания - Спец"}},"Entity-Digest":"sha1:PWJTTHLLGF34EXCSND67NBAXD52BGKCM"}}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"4561","Header-Length":"10","Inflated-CRC":"314291232","Inflated-Length":"16289"},"Offset":"12113","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 112 | 113 | WARC/1.0 114 | WARC-Type: metadata 115 | WARC-Target-URI: http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya 116 | WARC-Date: 2016-12-11T14:06:26Z 117 | WARC-Record-ID: 118 | WARC-Refers-To: 119 | Content-Type: application/json 120 | Content-Length: 1129 121 | 122 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"455","Block-Digest":"sha1:YMKQI2FHCMOTLUNIQWFNKT4UUNTP6SIQ","Actual-Content-Length":"20","WARC-Header-Metadata":{"WARC-Type":"metadata","WARC-Date":"2016-12-11T14:06:26Z","WARC-Warcinfo-ID":"","Content-Length":"20","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/produkty_pitaniya/specii/vip_step_distribyuterskaya_kompaniya_organicheskih_produktov_pitaniya","WARC-Concurrent-To":"","Content-Type":"application/warc-fields"},"Payload-Metadata":{"Trailing-Slop-Length":"4","WARC-Metadata-Metadata":{"Trailing-Slop-Length":"0","Metadata-Records":[{"Name":"fetchTimeMs","Value":"519"}],"Actual-Content-Length":"20"},"Actual-Content-Type":"application/metadata-fields"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"340","Header-Length":"10","Inflated-CRC":"-1405909676","Inflated-Length":"479"},"Offset":"16674","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 123 | 124 | WARC/1.0 125 | WARC-Type: metadata 126 | WARC-Target-URI: http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo 127 | WARC-Date: 2016-12-11T13:54:37Z 128 | WARC-Record-ID: 129 | WARC-Refers-To: 130 | Content-Type: application/json 131 | Content-Length: 1389 132 | 133 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"378","Block-Digest":"sha1:CGLSKMVV32ABZEIWXXAUTSJQBJ3SW32M","Actual-Content-Length":"244","WARC-Header-Metadata":{"WARC-Type":"request","WARC-Date":"2016-12-11T13:54:37Z","WARC-Warcinfo-ID":"","Content-Length":"244","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo","WARC-IP-Address":"213.155.18.48","Content-Type":"application/http; msgtype=request"},"Payload-Metadata":{"Trailing-Slop-Length":"4","HTTP-Request-Metadata":{"Headers":{"Host":"08.od.ua","Accept-Encoding":"x-gzip, gzip, deflate","User-Agent":"CCBot/2.0 (http://commoncrawl.org/faq/)","Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},"Headers-Length":"242","Entity-Length":"0","Entity-Trailing-Slop-Bytes":"0","Request-Message":{"Method":"GET","Version":"HTTP/1.0","Path":"/stroymaterialy/pilomaterialy/les_ooo_lis_ooo"},"Entity-Digest":"sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ"},"Actual-Content-Type":"application/http; msgtype=request"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"412","Header-Length":"10","Inflated-CRC":"-2049167030","Inflated-Length":"626"},"Offset":"17014","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 134 | 135 | WARC/1.0 136 | WARC-Type: metadata 137 | WARC-Target-URI: http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo 138 | WARC-Date: 2016-12-11T13:54:37Z 139 | WARC-Record-ID: 140 | WARC-Refers-To: 141 | Content-Type: application/json 142 | Content-Length: 8629 143 | 144 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"593","Block-Digest":"sha1:YQICPN5F5IALIV3X2ND4LOU5TIHAMZPJ","Actual-Content-Length":"16042","WARC-Header-Metadata":{"WARC-Type":"response","WARC-Truncated":"length","WARC-Date":"2016-12-11T13:54:37Z","WARC-Warcinfo-ID":"","Content-Length":"16042","WARC-Record-ID":"","WARC-Block-Digest":"sha1:YQICPN5F5IALIV3X2ND4LOU5TIHAMZPJ","WARC-Payload-Digest":"sha1:FMPEPMTWKD3YKRF76EUODBKHCY4VAABP","WARC-Target-URI":"http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo","WARC-IP-Address":"213.155.18.48","WARC-Concurrent-To":"","Content-Type":"application/http; msgtype=response"},"Payload-Metadata":{"Trailing-Slop-Length":"4","Actual-Content-Type":"application/http; msgtype=response","HTTP-Response-Metadata":{"Headers":{"Date":"Sun, 11 Dec 2016 13:07:50 GMT","Vary":"Accept-Encoding","Set-Cookie":"design=default; expires=Mon, 11-Dec-2017 13:07:50 GMT; path=/","Content-Type":"text/html; charset=UTF-8","Connection":"close","Server":"nginx/1.4.4"},"Headers-Length":"234","Entity-Length":"15808","Entity-Trailing-Slop-Bytes":"0","Response-Message":{"Status":"200","Version":"HTTP/1.1","Reason":"OK"},"HTML-Metadata":{"Links":[{"text":"Справочная Одессы","title":"Справочная Одессы","path":"A@/href","url":"/"},{"path":"FORM@/action","url":"/poisk/"},{"alt":"","path":"IMG@/src","url":"/design/default/images/loader.gif"},{"text":"О фирме","title":"Лес, ооо (лис, ооо) в Одессе","path":"A@/href","url":"/les_ooo_lis_ooo/main"},{"text":"Каталог продукции","title":"Лес, ооо (лис, ооо) - Каталог продукции в Одессе","path":"A@/href","url":"/les_ooo_lis_ooo"},{"text":"Прайс-листы","title":"Цены в Лес, ооо (лис, ооо) - Предприятия Одессы","path":"A@/href","url":"/les_ooo_lis_ooo/price/93558"},{"text":"Контакты","title":"Карта Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/contacts"},{"text":"Главная","title":"Сайт г. Одесса","path":"A@/href","url":"/"},{"text":"Стройматериалы","title":"Стройматериалы в Одессе","path":"A@/href","url":"/stroymaterialy"},{"text":"Пиломатериалы","title":"Пиломатериалы Одесса","path":"A@/href","url":"/stroymaterialy/pilomaterialy"},{"text":"Лиственница (9)","title":"Лиственница - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/93558"},{"text":"Пиломатериалы сухие (7)","title":"Пиломатериалы сухие - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/93557"},{"text":"Доска необрезная (8)","title":"Доска необрезная - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/93556"},{"text":"Брус (3)","title":"Брус - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/93555"},{"text":"Доска обрезная (12)","title":"Доска обрезная - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/93554"},{"text":"Антисептики, пропитки, средства для древесины (0)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/otdelochnye_materialy/antiseptiki_propitki_sredstva_dlya_drevesiny/les_ooo_lis_ooo"},{"text":"Пиломатериалы (1)","title":" - Каталог товаров Одесса","path":"A@/href","url":"/stroymaterialy/pilomaterialy/les_ooo_lis_ooo"},{"text":"Металлические крепления (4)","title":"Металлические крепления - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/95254"},{"text":"Рейка (1)","title":"Рейка - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/97250"},{"text":"Вагонка (5)","title":"Вагонка - Каталог товаров Одесса","path":"A@/href","url":"/les_ooo_lis_ooo/price/98839"},{"text":"Опилки, стружка","title":"Опилки, стружка купить в Одессе","path":"A@/href","url":"/les_ooo_lis_ooo/tovar_opilki_struzhka"},{"alt":"Опилки, стружка","path":"IMG@/src","url":"/cache/f/d/8/8/2c2f3c849bdd6b13c48d2f04356e.jpg"},{"path":"A@/href","url":"/les_ooo_lis_ooo/tovar_opilki_struzhka"},{"alt":"","path":"IMG@/src","url":"http://08.od.ua/data/price/lis/222.jpg"},{"text":"О фирме","title":"Лес, ооо (лис, ооо) в Одессе","path":"A@/href","url":"/les_ooo_lis_ooo/main"},{"text":"Каталог продукции","title":"Лес, ооо (лис, ооо) - Каталог продукции в Одессе","path":"A@/href","url":"/les_ooo_lis_ooo"},{"text":"Прайс-листы","title":"Цены в Лес, ооо (лис, ооо) - Предприятия Одессы","path":"A@/href","url":"/les_ooo_lis_ooo/price/93558"},{"text":"Контакты","title":"Одесса карта проезда","path":"A@/href","url":"/les_ooo_lis_ooo/contacts"},{"text":"http://08.od.ua","title":"Одесса сайт","path":"A@/href","url":"/"},{"alt":"bigmir)net TOP 100","path":"IMG@/src","url":"http://c.bigmir.net/?v16840041&s16839405&t21"},{"alt":"Rambler's Top100","path":"IMG@/src","url":"/design/default/images/rambler.gif"},{"alt":"LiveInternet","path":"IMG@/src","url":"http://counter.yadro.ru/hit?t44.6;uhttp%3A%2F%2F08.od.ua%2Fstroymaterialy%2Fpilomaterialy%2Fles_ooo_lis_ooo"},{"alt":"hit.ua","path":"IMG@/src","url":"http://c.hit.ua/hit?i=107467&g=0&x=2&s=1&c=1&t=-120&r=&u=http%3A%2F%2F08.od.ua%2Fstroymaterialy%2Fpilomaterialy%2Fles_ooo_lis_ooo"},{"alt":"","path":"IMG@/src","url":"http://counter.rambler.ru/top100.scn?1919603"},{"alt":"","path":"IMG@/src","url":"//mc.yandex.ru/watch/19895821"},{"path":"A@/href","url":"https://plus.google.com/108905381149799728781"}],"Head":{"Link":[{"path":"LINK@/href","rel":"icon","type":"image/x-icon","url":"/design/default/images/spravochnaya.ico"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/coners.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/buttons.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/styles.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/lytebox.css"}],"Scripts":[{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/lytebox.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"https://apis.google.com/js/plusone.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/08.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-1.4.2.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.formtips.1.2.2.packed.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-ui-1.7.2.custom.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.equalHeight.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/start.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"http://www.google-analytics.com/ga.js"}],"Metas":[{"content":"text/html; charset=UTF-8","http-equiv":"Content-Type"},{"content":"ru-RU","http-equiv":"Content-Language"},{"content":"Лес, ооо (лис, ооо) в Одессе, Стройматериалы - Пиломатериалы","name":"description"},{"content":"Лес, ооо (лис, ооо), Одесса, Стройматериалы - Пиломатериалы","name":"keywords"},{"content":"initial-scale=1.0, user-scalable=no","name":"viewport"}],"Title":"Лес, ооо (лис, ооо) в Одессе, Стройматериалы - Пиломатериалы"}},"Entity-Digest":"sha1:FMPEPMTWKD3YKRF76EUODBKHCY4VAABP"}}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"4999","Header-Length":"10","Inflated-CRC":"-1745377820","Inflated-Length":"16639"},"Offset":"17426","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 145 | 146 | WARC/1.0 147 | WARC-Type: metadata 148 | WARC-Target-URI: http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo 149 | WARC-Date: 2016-12-11T13:54:37Z 150 | WARC-Record-ID: 151 | WARC-Refers-To: 152 | Content-Type: application/json 153 | Content-Length: 1079 154 | 155 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"405","Block-Digest":"sha1:4AWTITAEHX3AEABU3CR6V3GKK7I6NMPD","Actual-Content-Length":"20","WARC-Header-Metadata":{"WARC-Type":"metadata","WARC-Date":"2016-12-11T13:54:37Z","WARC-Warcinfo-ID":"","Content-Length":"20","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/stroymaterialy/pilomaterialy/les_ooo_lis_ooo","WARC-Concurrent-To":"","Content-Type":"application/warc-fields"},"Payload-Metadata":{"Trailing-Slop-Length":"4","WARC-Metadata-Metadata":{"Trailing-Slop-Length":"0","Metadata-Records":[{"Name":"fetchTimeMs","Value":"516"}],"Actual-Content-Length":"20"},"Actual-Content-Type":"application/metadata-fields"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"310","Header-Length":"10","Inflated-CRC":"-1674268030","Inflated-Length":"429"},"Offset":"22425","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 156 | 157 | WARC/1.0 158 | WARC-Type: metadata 159 | WARC-Target-URI: http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts 160 | WARC-Date: 2016-12-11T14:08:36Z 161 | WARC-Record-ID: 162 | WARC-Refers-To: 163 | Content-Type: application/json 164 | Content-Length: 1383 165 | 166 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"375","Block-Digest":"sha1:SAK3OJMAHRIV7ABDKGD6INWRJXHUXSA4","Actual-Content-Length":"241","WARC-Header-Metadata":{"WARC-Type":"request","WARC-Date":"2016-12-11T14:08:36Z","WARC-Warcinfo-ID":"","Content-Length":"241","WARC-Record-ID":"","WARC-Target-URI":"http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts","WARC-IP-Address":"213.155.18.48","Content-Type":"application/http; msgtype=request"},"Payload-Metadata":{"Trailing-Slop-Length":"4","HTTP-Request-Metadata":{"Headers":{"Host":"08.od.ua","Accept-Encoding":"x-gzip, gzip, deflate","User-Agent":"CCBot/2.0 (http://commoncrawl.org/faq/)","Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},"Headers-Length":"239","Entity-Length":"0","Entity-Trailing-Slop-Bytes":"0","Request-Message":{"Method":"GET","Version":"HTTP/1.0","Path":"/svoboda_vseukrainskoe_obedinenie/contacts"},"Entity-Digest":"sha1:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ"},"Actual-Content-Type":"application/http; msgtype=request"}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"416","Header-Length":"10","Inflated-CRC":"-1566223156","Inflated-Length":"620"},"Offset":"22735","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 167 | 168 | WARC/1.0 169 | WARC-Type: metadata 170 | WARC-Target-URI: http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts 171 | WARC-Date: 2016-12-11T14:08:36Z 172 | WARC-Record-ID: 173 | WARC-Refers-To: 174 | Content-Type: application/json 175 | Content-Length: 6318 176 | 177 | {"Envelope":{"Format":"WARC","WARC-Header-Length":"590","Block-Digest":"sha1:Q6EEMLUSOMRC77UBE34TB6TRDT6MGQD4","Actual-Content-Length":"11576","WARC-Header-Metadata":{"WARC-Type":"response","WARC-Truncated":"length","WARC-Date":"2016-12-11T14:08:36Z","WARC-Warcinfo-ID":"","Content-Length":"11576","WARC-Record-ID":"","WARC-Block-Digest":"sha1:Q6EEMLUSOMRC77UBE34TB6TRDT6MGQD4","WARC-Payload-Digest":"sha1:OJQLP3IBNRTJTF3U4HEZQ24ETE2DZMCQ","WARC-Target-URI":"http://08.od.ua/svoboda_vseukrainskoe_obedinenie/contacts","WARC-IP-Address":"213.155.18.48","WARC-Concurrent-To":"","Content-Type":"application/http; msgtype=response"},"Payload-Metadata":{"Trailing-Slop-Length":"4","Actual-Content-Type":"application/http; msgtype=response","HTTP-Response-Metadata":{"Headers":{"Date":"Sun, 11 Dec 2016 13:21:49 GMT","Vary":"Accept-Encoding","Set-Cookie":"design=default; expires=Mon, 11-Dec-2017 13:21:49 GMT; path=/","Content-Type":"text/html; charset=UTF-8","Connection":"close","Server":"nginx/1.4.4"},"Headers-Length":"234","Entity-Length":"11342","Entity-Trailing-Slop-Bytes":"0","Response-Message":{"Status":"200","Version":"HTTP/1.1","Reason":"OK"},"HTML-Metadata":{"Links":[{"text":"Справочная Одессы","title":"Справочная Одессы","path":"A@/href","url":"/"},{"path":"FORM@/action","url":"/poisk/"},{"alt":"","path":"IMG@/src","url":"/design/default/images/loader.gif"},{"text":"О фирме","title":"Свобода, всеукраинское объединение в Одессе","path":"A@/href","url":"/svoboda_vseukrainskoe_obedinenie/main"},{"text":"Каталог продукции","title":"Свобода, всеукраинское объединение - Каталог продукции в Одессе","path":"A@/href","url":"/svoboda_vseukrainskoe_obedinenie"},{"text":"Прайс-листы","title":"Цены в Свобода, всеукраинское объединение - Предприятия Одессы","path":"A@/href","url":"/gorodskie_sluzhby/partii/svoboda_vseukrainskoe_obedinenie"},{"text":"Контакты","title":"Карта Одесса","path":"A@/href","url":"/svoboda_vseukrainskoe_obedinenie/contacts"},{"text":"О фирме","title":"Свобода, всеукраинское объединение в Одессе","path":"A@/href","url":"/svoboda_vseukrainskoe_obedinenie/main"},{"text":"Каталог продукции","title":"Свобода, всеукраинское объединение - Каталог продукции в Одессе","path":"A@/href","url":"/svoboda_vseukrainskoe_obedinenie"},{"text":"Прайс-листы","title":"Цены в Свобода, всеукраинское объединение - Предприятия Одессы","path":"A@/href","url":"/gorodskie_sluzhby/partii/svoboda_vseukrainskoe_obedinenie"},{"text":"Контакты","title":"Одесса карта проезда","path":"A@/href","url":"/svoboda_vseukrainskoe_obedinenie/contacts"},{"text":"http://08.od.ua","title":"Одесса сайт","path":"A@/href","url":"/"},{"alt":"bigmir)net TOP 100","path":"IMG@/src","url":"http://c.bigmir.net/?v16840041&s16839405&t21"},{"alt":"Rambler's Top100","path":"IMG@/src","url":"/design/default/images/rambler.gif"},{"alt":"LiveInternet","path":"IMG@/src","url":"http://counter.yadro.ru/hit?t44.6;uhttp%3A%2F%2F08.od.ua%2Fsvoboda_vseukrainskoe_obedinenie%2Fcontacts"},{"alt":"hit.ua","path":"IMG@/src","url":"http://c.hit.ua/hit?i=107467&g=0&x=2&s=1&c=1&t=-120&r=&u=http%3A%2F%2F08.od.ua%2Fsvoboda_vseukrainskoe_obedinenie%2Fcontacts"},{"alt":"","path":"IMG@/src","url":"http://counter.rambler.ru/top100.scn?1919603"},{"alt":"","path":"IMG@/src","url":"//mc.yandex.ru/watch/19895821"},{"path":"A@/href","url":"https://plus.google.com/108905381149799728781"}],"Head":{"Link":[{"path":"LINK@/href","rel":"icon","type":"image/x-icon","url":"/design/default/images/spravochnaya.ico"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/coners.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/buttons.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/styles.css"},{"path":"LINK@/href","rel":"stylesheet","type":"text/css","url":"/design/default/css/restyle/lytebox.css"}],"Scripts":[{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/lytebox.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"https://apis.google.com/js/plusone.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/08.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-1.4.2.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.formtips.1.2.2.packed.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery-ui-1.7.2.custom.min.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/jquery.equalHeight.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"/design/default/js/restyle/start.js"},{"path":"SCRIPT@/src","type":"text/javascript","url":"http://www.google-analytics.com/ga.js"}],"Metas":[{"content":"text/html; charset=UTF-8","http-equiv":"Content-Type"},{"content":"ru-RU","http-equiv":"Content-Language"},{"content":"Свобода, всеукраинское объединение в Одессе - Контакты","name":"description"},{"content":"Свобода, всеукраинское объединение, Одесса","name":"keywords"},{"content":"initial-scale=1.0, user-scalable=no","name":"viewport"}],"Title":"Свобода, всеукраинское объединение в Одессе - Контакты"}},"Entity-Digest":"sha1:OJQLP3IBNRTJTF3U4HEZQ24ETE2DZMCQ"}}},"Container":{"Compressed":true,"Gzip-Metadata":{"Footer-Length":"8","Deflate-Length":"4038","Header-Length":"10","Inflated-CRC":"-693908111","Inflated-Length":"12170"},"Offset":"23151","Filename":"CC-MAIN-20161202170904-00511-ip-10-31-129-80.ec2.internal.warc.gz"}} 178 | -------------------------------------------------------------------------------- /inst/samples/sample.wat.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-spark/sparkwarc/c3e8975ad7fb49bde159d33afc5a72d09e220b2b/inst/samples/sample.wat.gz -------------------------------------------------------------------------------- /inst/samples/sample.wet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-spark/sparkwarc/c3e8975ad7fb49bde159d33afc5a72d09e220b2b/inst/samples/sample.wet -------------------------------------------------------------------------------- /inst/samples/sample.wet.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-spark/sparkwarc/c3e8975ad7fb49bde159d33afc5a72d09e220b2b/inst/samples/sample.wet.gz -------------------------------------------------------------------------------- /java/SparkWARC.scala: -------------------------------------------------------------------------------- 1 | package SparkWARC 2 | 3 | import org.apache.spark.SparkContext 4 | import org.apache.spark.rdd.RDD 5 | import org.apache.spark.sql._ 6 | import scala.util.matching._ 7 | import org.apache.spark.sql.types._ 8 | 9 | object WARC { 10 | def parse(sc: SparkContext, path: String, matchLine: String, repartitions: Int) : DataFrame = { 11 | val sqlContext = new SQLContext(sc) 12 | val warc = sc.textFile(path) 13 | val warcRepart = if (repartitions > 0) warc.repartition(repartitions) else warc 14 | 15 | val warcParsed = warcRepart 16 | .filter(line => line.contains(matchLine)) 17 | .map(line => { 18 | Row( 19 | "<[^>]*>".r.findAllIn(line).length, 20 | line 21 | ) 22 | }) 23 | 24 | val warcStruct = StructType( 25 | StructField("tags", IntegerType, true) :: 26 | StructField("content", StringType, true) :: Nil 27 | ) 28 | 29 | sqlContext.createDataFrame(warcParsed, warcStruct) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /man/cc_warc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/commoncrawl.R 3 | \name{cc_warc} 4 | \alias{cc_warc} 5 | \title{Provides WARC paths for commoncrawl.org} 6 | \usage{ 7 | cc_warc(start, end = start) 8 | } 9 | \arguments{ 10 | \item{start}{The first path to retrieve.} 11 | 12 | \item{end}{The last path to retrieve.} 13 | } 14 | \description{ 15 | Provides WARC paths for commoncrawl.org. To be used with 16 | \code{spark_read_warc}. 17 | } 18 | \examples{ 19 | 20 | cc_warc(1) 21 | cc_warc(2, 3) 22 | 23 | } 24 | -------------------------------------------------------------------------------- /man/rcpp_read_warc_sample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample.R 3 | \name{rcpp_read_warc_sample} 4 | \alias{rcpp_read_warc_sample} 5 | \title{Loads the sample warc file in Rcpp} 6 | \usage{ 7 | rcpp_read_warc_sample(filter = "", include = "") 8 | } 9 | \arguments{ 10 | \item{filter}{A regular expression used to filter to each warc entry 11 | efficiently by running native code using \code{Rcpp}.} 12 | 13 | \item{include}{A regular expression used to keep only matching lines 14 | efficiently by running native code using \code{Rcpp}.} 15 | } 16 | \description{ 17 | Loads the sample warc file in Rcpp 18 | } 19 | -------------------------------------------------------------------------------- /man/spark_rcpp_read_warc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sparkwarc.R 3 | \name{spark_rcpp_read_warc} 4 | \alias{spark_rcpp_read_warc} 5 | \title{Reads a WARC File into using Rcpp} 6 | \usage{ 7 | spark_rcpp_read_warc(path, match_warc, match_line) 8 | } 9 | \arguments{ 10 | \item{path}{The path to the file. Needs to be accessible from the cluster. 11 | Supports the \samp{"hdfs://"}, \samp{"s3n://"} and \samp{"file://"} protocols.} 12 | 13 | \item{match_warc}{include only warc files mathcing this character string.} 14 | 15 | \item{match_line}{include only lines mathcing this character string.} 16 | } 17 | \description{ 18 | Reads a WARC (Web ARChive) file using Rcpp. 19 | } 20 | -------------------------------------------------------------------------------- /man/spark_read_warc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sparkwarc.R 3 | \name{spark_read_warc} 4 | \alias{spark_read_warc} 5 | \title{Reads a WARC File into Apache Spark} 6 | \usage{ 7 | spark_read_warc( 8 | sc, 9 | name, 10 | path, 11 | repartition = 0L, 12 | memory = TRUE, 13 | overwrite = TRUE, 14 | match_warc = "", 15 | match_line = "", 16 | parser = c("r", "scala"), 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{sc}{An active \code{spark_connection}.} 22 | 23 | \item{name}{The name to assign to the newly generated table.} 24 | 25 | \item{path}{The path to the file. Needs to be accessible from the cluster. 26 | Supports the \samp{"hdfs://"}, \samp{"s3n://"} and \samp{"file://"} protocols.} 27 | 28 | \item{repartition}{The number of partitions used to distribute the 29 | generated table. Use 0 (the default) to avoid partitioning.} 30 | 31 | \item{memory}{Boolean; should the data be loaded eagerly into memory? (That 32 | is, should the table be cached?)} 33 | 34 | \item{overwrite}{Boolean; overwrite the table with the given name if it 35 | already exists?} 36 | 37 | \item{match_warc}{include only warc files mathcing this character string.} 38 | 39 | \item{match_line}{include only lines mathcing this character string.} 40 | 41 | \item{parser}{which parser implementation to use? Options are "scala" 42 | or "r" (default).} 43 | 44 | \item{...}{Additional arguments reserved for future use.} 45 | } 46 | \description{ 47 | Reads a WARC (Web ARChive) file into Apache Spark using sparklyr. 48 | } 49 | \examples{ 50 | 51 | \dontrun{ 52 | library(sparklyr) 53 | library(sparkwarc) 54 | sc <- spark_connect(master = "local") 55 | sdf <- spark_read_warc( 56 | sc, 57 | name = "sample_warc", 58 | path = system.file(file.path("samples", "sample.warc"), package = "sparkwarc"), 59 | memory = FALSE, 60 | overwrite = FALSE 61 | ) 62 | 63 | spark_disconnect(sc) 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /man/spark_read_warc_sample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample.R 3 | \name{spark_read_warc_sample} 4 | \alias{spark_read_warc_sample} 5 | \title{Loads the sample warc file in Spark} 6 | \usage{ 7 | spark_read_warc_sample(sc, filter = "", include = "") 8 | } 9 | \arguments{ 10 | \item{sc}{An active \code{spark_connection}.} 11 | 12 | \item{filter}{A regular expression used to filter to each warc entry 13 | efficiently by running native code using \code{Rcpp}.} 14 | 15 | \item{include}{A regular expression used to keep only matching lines 16 | efficiently by running native code using \code{Rcpp}.} 17 | } 18 | \description{ 19 | Loads the sample warc file in Spark 20 | } 21 | -------------------------------------------------------------------------------- /man/spark_warc_sample_path.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample.R 3 | \name{spark_warc_sample_path} 4 | \alias{spark_warc_sample_path} 5 | \title{Retrieves sample warc path} 6 | \usage{ 7 | spark_warc_sample_path() 8 | } 9 | \description{ 10 | Retrieves sample warc path 11 | } 12 | -------------------------------------------------------------------------------- /man/sparkwarc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/package.R 3 | \docType{package} 4 | \name{sparkwarc} 5 | \alias{sparkwarc} 6 | \title{sparkwarc} 7 | \description{ 8 | Sparklyr extension for loading WARC Files into Apache Spark 9 | } 10 | -------------------------------------------------------------------------------- /sparkwarc.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS=-lz 2 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // rcpp_read_warc 9 | DataFrame rcpp_read_warc(std::string const& path, std::string const& filter, std::string const& include); 10 | RcppExport SEXP _sparkwarc_rcpp_read_warc(SEXP pathSEXP, SEXP filterSEXP, SEXP includeSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< std::string const& >::type path(pathSEXP); 15 | Rcpp::traits::input_parameter< std::string const& >::type filter(filterSEXP); 16 | Rcpp::traits::input_parameter< std::string const& >::type include(includeSEXP); 17 | rcpp_result_gen = Rcpp::wrap(rcpp_read_warc(path, filter, include)); 18 | return rcpp_result_gen; 19 | END_RCPP 20 | } 21 | 22 | static const R_CallMethodDef CallEntries[] = { 23 | {"_sparkwarc_rcpp_read_warc", (DL_FUNC) &_sparkwarc_rcpp_read_warc, 3}, 24 | {NULL, NULL, 0} 25 | }; 26 | 27 | RcppExport void R_init_sparkwarc(DllInfo *dll) { 28 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 29 | R_useDynamicSymbols(dll, FALSE); 30 | } 31 | -------------------------------------------------------------------------------- /src/warc.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | #include 5 | #include 6 | 7 | std::size_t rcpp_find_tag(std::string const &line, std::size_t pos) { 8 | auto const tag_start = line.find("<", pos); 9 | if (tag_start != std::string::npos && 10 | line.find(">", tag_start + 1) != std::string::npos) { 11 | return tag_start; 12 | } 13 | 14 | return std::string::npos; 15 | } 16 | 17 | constexpr std::size_t kBufSz = 4 * 1024; 18 | constexpr std::size_t kAvgWarcSz = 40 * 1024; 19 | std::string const kWarcSep = "WARC/1.0"; 20 | 21 | // [[Rcpp::export]] 22 | DataFrame rcpp_read_warc(std::string const &path, std::string const &filter, 23 | std::string const &include) { 24 | 25 | FILE *fp = fopen(path.c_str(), "rb"); 26 | if (!fp) 27 | Rcpp::stop("Failed to open WARC file."); 28 | 29 | gzFile gzf = gzdopen(fileno(fp), "rb"); 30 | if (!gzf) 31 | Rcpp::stop("Failed to open WARC as a compressed file."); 32 | 33 | char buf[kBufSz] = {'\0'}; 34 | 35 | std::list warc_entries; 36 | 37 | std::string warc_entry; 38 | warc_entry.reserve(kAvgWarcSz); 39 | 40 | bool one_matched = false; 41 | 42 | long stats_tags_total = 0; 43 | std::list warc_stats; 44 | 45 | while (gzgets(gzf, buf, kBufSz) != Z_NULL) { 46 | std::string line(buf); 47 | 48 | if (!filter.empty() && !one_matched) { 49 | one_matched = line.find(filter) != std::string::npos; 50 | } 51 | 52 | if (line.substr(0, kWarcSep.size()) == kWarcSep && warc_entry.size() > 0) { 53 | if (filter.empty() || one_matched) { 54 | warc_entries.emplace_back(std::move(warc_entry)); 55 | warc_stats.push_back(stats_tags_total); 56 | stats_tags_total = 0; 57 | } 58 | 59 | one_matched = false; 60 | 61 | warc_entry.clear(); 62 | } 63 | 64 | auto tag_start = rcpp_find_tag(line, 0); 65 | while (tag_start != std::string::npos) { 66 | stats_tags_total += 1; 67 | tag_start = rcpp_find_tag(line, tag_start + 1); 68 | } 69 | 70 | if (include.empty() || line.find(include) != std::string::npos) { 71 | warc_entry.append(std::move(line)); 72 | } 73 | } 74 | 75 | if (gzf) 76 | gzclose(gzf); 77 | if (fp) 78 | fclose(fp); 79 | 80 | std::size_t idxEntry = 0; 81 | CharacterVector results(warc_entries.size()); 82 | std::for_each(std::make_move_iterator(warc_entries.begin()), 83 | std::make_move_iterator(warc_entries.end()), 84 | [&results, &idxEntry](std::string &&entry) { 85 | results[idxEntry++] = std::move(entry); 86 | }); 87 | 88 | std::size_t idxStat = 0; 89 | NumericVector stats(warc_stats.size()); 90 | std::for_each(warc_stats.begin(), warc_stats.end(), 91 | [&stats, &idxStat](long &stat) { stats[idxStat++] = stat; }); 92 | 93 | return DataFrame::create(Named("tags") = std::move(stats), 94 | _["content"] = std::move(results)); 95 | } 96 | --------------------------------------------------------------------------------