├── .gitignore ├── .travis.yml ├── CHANGES.rst ├── README.rst ├── codecov.yml ├── docs ├── Makefile ├── changes.rst ├── conf.py ├── images │ ├── wa-buttons.png │ └── wa-options.png ├── index.rst ├── intro.rst ├── make.bat ├── ref │ ├── base.rst │ ├── crfsuite.rst │ ├── features.rst │ ├── grouping.rst │ ├── index.rst │ ├── loaders.rst │ ├── metrics.rst │ ├── misc.rst │ ├── model.rst │ ├── wapiti.rst │ └── webannotator.rst └── tutorial.rst ├── example ├── README.rst ├── ner │ ├── __init__.py │ ├── build_gazetteers.py │ ├── cv.py │ ├── data.py │ ├── train.py │ └── utils.py └── requirements.txt ├── ideas.rst ├── requirements-dev.txt ├── requirements-doc.txt ├── requirements.txt ├── setup.cfg ├── setup.py ├── tox.ini ├── webstruct ├── __init__.py ├── _fileresource.py ├── annotation_converter.py ├── annotation_verifier.py ├── base.py ├── crfsuite.py ├── feature_extraction.py ├── features │ ├── __init__.py │ ├── block_features.py │ ├── data_features.py │ ├── datetime_format.py │ ├── global_features.py │ └── token_features.py ├── gazetteers │ ├── __init__.py │ ├── features.py │ └── geonames.py ├── grouping.py ├── html_tokenizer.py ├── html_tokenizer_benchmark.py ├── infer_domain.py ├── loaders.py ├── metrics.py ├── model.py ├── model_benchmark.py ├── sequence_encoding.py ├── tests │ ├── __init__.py │ ├── data │ │ ├── wa1.html │ │ └── wa2.html │ ├── test_crfsuite.py │ ├── test_html_tokenizer.py │ ├── test_html_tools.py │ ├── test_infer_domain.py │ ├── test_loaders.py │ ├── test_pattern_features.py │ ├── test_text_tokenizer.py │ ├── test_utils.py │ ├── test_wapiti.py │ ├── test_webannotator.py │ └── utils.py ├── text_tokenizers.py ├── utils.py ├── wapiti.py └── webannotator.py └── webstruct_data ├── README.rst ├── clean_html.py ├── corpus ├── business_pages │ ├── README.rst │ ├── source │ │ ├── 0.html │ │ ├── 1.html │ │ ├── 10.html │ │ ├── 100.html │ │ ├── 101.html │ │ ├── 102.html │ │ ├── 103.html │ │ ├── 104.html │ │ ├── 105.html │ │ ├── 106.html │ │ ├── 107.html │ │ ├── 108.html │ │ ├── 109.html │ │ ├── 11.html │ │ ├── 110.html │ │ ├── 111.html │ │ ├── 112.html │ │ ├── 113.html │ │ ├── 114.html │ │ ├── 115.html │ │ ├── 116.html │ │ ├── 117.html │ │ ├── 118.html │ │ ├── 119.html │ │ ├── 12.html │ │ ├── 120.html │ │ ├── 121.html │ │ ├── 122.html │ │ ├── 123.html │ │ ├── 124.html │ │ ├── 125.html │ │ ├── 126.html │ │ ├── 127.html │ │ ├── 128.html │ │ ├── 129.html │ │ ├── 13.html │ │ ├── 130.html │ │ ├── 131.html │ │ ├── 132.html │ │ ├── 133.html │ │ ├── 134.html │ │ ├── 135.html │ │ ├── 136.html │ │ ├── 137.html │ │ ├── 138.html │ │ ├── 139.html │ │ ├── 14.html │ │ ├── 140.html │ │ ├── 141.html │ │ ├── 142.html │ │ ├── 143.html │ │ ├── 144.html │ │ ├── 145.html │ │ ├── 146.html │ │ ├── 147.html │ │ ├── 148.html │ │ ├── 149.html │ │ ├── 15.html │ │ ├── 150.html │ │ ├── 151.html │ │ ├── 152.html │ │ ├── 153.html │ │ ├── 154.html │ │ ├── 155.html │ │ ├── 156.html │ │ ├── 157.html │ │ ├── 158.html │ │ ├── 159.html │ │ ├── 16.html │ │ ├── 160.html │ │ ├── 161.html │ │ ├── 162.html │ │ ├── 163.html │ │ ├── 164.html │ │ ├── 165.html │ │ ├── 166.html │ │ ├── 167.html │ │ ├── 168.html │ │ ├── 169.html │ │ ├── 17.html │ │ ├── 170.html │ │ ├── 171.html │ │ ├── 172.html │ │ ├── 173.html │ │ ├── 174.html │ │ ├── 175.html │ │ ├── 176.html │ │ ├── 177.html │ │ ├── 178.html │ │ ├── 179.html │ │ ├── 18.html │ │ ├── 180.html │ │ ├── 181.html │ │ ├── 182.html │ │ ├── 183.html │ │ ├── 184.html │ │ ├── 185.html │ │ ├── 186.html │ │ ├── 187.html │ │ ├── 188.html │ │ ├── 189.html │ │ ├── 19.html │ │ ├── 190.html │ │ ├── 191.html │ │ ├── 192.html │ │ ├── 193.html │ │ ├── 194.html │ │ ├── 195.html │ │ ├── 196.html │ │ ├── 197.html │ │ ├── 198.html │ │ ├── 199.html │ │ ├── 2.html │ │ ├── 20.html │ │ ├── 200.html │ │ ├── 201.html │ │ ├── 202.html │ │ ├── 203.html │ │ ├── 204.html │ │ ├── 205.html │ │ ├── 206.html │ │ ├── 207.html │ │ ├── 208.html │ │ ├── 209.html │ │ ├── 21.html │ │ ├── 210.html │ │ ├── 211.html │ │ ├── 212.html │ │ ├── 213.html │ │ ├── 214.html │ │ ├── 215.html │ │ ├── 216.html │ │ ├── 217.html │ │ ├── 218.html │ │ ├── 219.html │ │ ├── 22.html │ │ ├── 220.html │ │ ├── 221.html │ │ ├── 222.html │ │ ├── 223.html │ │ ├── 224.html │ │ ├── 225.html │ │ ├── 226.html │ │ ├── 227.html │ │ ├── 228.html │ │ ├── 229.html │ │ ├── 23.html │ │ ├── 230.html │ │ ├── 231.html │ │ ├── 232.html │ │ ├── 233.html │ │ ├── 234.html │ │ ├── 235.html │ │ ├── 236.html │ │ ├── 237.html │ │ ├── 238.html │ │ ├── 239.html │ │ ├── 24.html │ │ ├── 240.html │ │ ├── 241.html │ │ ├── 242.html │ │ ├── 243.html │ │ ├── 244.html │ │ ├── 245.html │ │ ├── 246.html │ │ ├── 247.html │ │ ├── 248.html │ │ ├── 249.html │ │ ├── 25.html │ │ ├── 250.html │ │ ├── 251.html │ │ ├── 252.html │ │ ├── 253.html │ │ ├── 254.html │ │ ├── 255.html │ │ ├── 256.html │ │ ├── 257.html │ │ ├── 258.html │ │ ├── 259.html │ │ ├── 26.html │ │ ├── 260.html │ │ ├── 261.html │ │ ├── 262.html │ │ ├── 263.html │ │ ├── 264.html │ │ ├── 265.html │ │ ├── 266.html │ │ ├── 267.html │ │ ├── 268.html │ │ ├── 269.html │ │ ├── 27.html │ │ ├── 270.html │ │ ├── 271.html │ │ ├── 272.html │ │ ├── 273.html │ │ ├── 274.html │ │ ├── 275.html │ │ ├── 276.html │ │ ├── 277.html │ │ ├── 278.html │ │ ├── 279.html │ │ ├── 28.html │ │ ├── 280.html │ │ ├── 281.html │ │ ├── 282.html │ │ ├── 283.html │ │ ├── 284.html │ │ ├── 285.html │ │ ├── 286.html │ │ ├── 287.html │ │ ├── 288.html │ │ ├── 289.html │ │ ├── 29.html │ │ ├── 290.html │ │ ├── 291.html │ │ ├── 292.html │ │ ├── 293.html │ │ ├── 294.html │ │ ├── 295.html │ │ ├── 296.html │ │ ├── 297.html │ │ ├── 298.html │ │ ├── 299.html │ │ ├── 3.html │ │ ├── 30.html │ │ ├── 300.html │ │ ├── 301.html │ │ ├── 302.html │ │ ├── 303.html │ │ ├── 304.html │ │ ├── 305.html │ │ ├── 306.html │ │ ├── 307.html │ │ ├── 308.html │ │ ├── 309.html │ │ ├── 31.html │ │ ├── 310.html │ │ ├── 311.html │ │ ├── 312.html │ │ ├── 313.html │ │ ├── 314.html │ │ ├── 315.html │ │ ├── 316.html │ │ ├── 317.html │ │ ├── 318.html │ │ ├── 319.html │ │ ├── 32.html │ │ ├── 320.html │ │ ├── 321.html │ │ ├── 322.html │ │ ├── 323.html │ │ ├── 324.html │ │ ├── 325.html │ │ ├── 326.html │ │ ├── 327.html │ │ ├── 328.html │ │ ├── 329.html │ │ ├── 33.html │ │ ├── 330.html │ │ ├── 331.html │ │ ├── 332.html │ │ ├── 333.html │ │ ├── 334.html │ │ ├── 335.html │ │ ├── 336.html │ │ ├── 337.html │ │ ├── 338.html │ │ ├── 339.html │ │ ├── 34.html │ │ ├── 340.html │ │ ├── 341.html │ │ ├── 342.html │ │ ├── 343.html │ │ ├── 344.html │ │ ├── 345.html │ │ ├── 346.html │ │ ├── 347.html │ │ ├── 348.html │ │ ├── 349.html │ │ ├── 35.html │ │ ├── 350.html │ │ ├── 351.html │ │ ├── 352.html │ │ ├── 353.html │ │ ├── 354.html │ │ ├── 355.html │ │ ├── 356.html │ │ ├── 357.html │ │ ├── 358.html │ │ ├── 359.html │ │ ├── 36.html │ │ ├── 360.html │ │ ├── 361.html │ │ ├── 362.html │ │ ├── 363.html │ │ ├── 364.html │ │ ├── 365.html │ │ ├── 366.html │ │ ├── 367.html │ │ ├── 368.html │ │ ├── 369.html │ │ ├── 37.html │ │ ├── 370.html │ │ ├── 371.html │ │ ├── 372.html │ │ ├── 373.html │ │ ├── 374.html │ │ ├── 375.html │ │ ├── 376.html │ │ ├── 377.html │ │ ├── 378.html │ │ ├── 379.html │ │ ├── 38.html │ │ ├── 380.html │ │ ├── 381.html │ │ ├── 382.html │ │ ├── 383.html │ │ ├── 384.html │ │ ├── 385.html │ │ ├── 386.html │ │ ├── 387.html │ │ ├── 388.html │ │ ├── 389.html │ │ ├── 39.html │ │ ├── 390.html │ │ ├── 391.html │ │ ├── 392.html │ │ ├── 393.html │ │ ├── 394.html │ │ ├── 395.html │ │ ├── 396.html │ │ ├── 397.html │ │ ├── 398.html │ │ ├── 399.html │ │ ├── 4.html │ │ ├── 40.html │ │ ├── 400.html │ │ ├── 401.html │ │ ├── 402.html │ │ ├── 403.html │ │ ├── 404.html │ │ ├── 405.html │ │ ├── 406.html │ │ ├── 407.html │ │ ├── 408.html │ │ ├── 409.html │ │ ├── 41.html │ │ ├── 410.html │ │ ├── 411.html │ │ ├── 413.html │ │ ├── 414.html │ │ ├── 415.html │ │ ├── 416.html │ │ ├── 417.html │ │ ├── 418.html │ │ ├── 419.html │ │ ├── 42.html │ │ ├── 420.html │ │ ├── 421.html │ │ ├── 422.html │ │ ├── 423.html │ │ ├── 424.html │ │ ├── 425.html │ │ ├── 426.html │ │ ├── 427.html │ │ ├── 428.html │ │ ├── 429.html │ │ ├── 43.html │ │ ├── 430.html │ │ ├── 431.html │ │ ├── 432.html │ │ ├── 433.html │ │ ├── 434.html │ │ ├── 435.html │ │ ├── 436.html │ │ ├── 437.html │ │ ├── 438.html │ │ ├── 439.html │ │ ├── 44.html │ │ ├── 440.html │ │ ├── 441.html │ │ ├── 442.html │ │ ├── 443.html │ │ ├── 444.html │ │ ├── 445.html │ │ ├── 446.html │ │ ├── 447.html │ │ ├── 448.html │ │ ├── 449.html │ │ ├── 45.html │ │ ├── 450.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 48.html │ │ ├── 49.html │ │ ├── 5.html │ │ ├── 50.html │ │ ├── 51.html │ │ ├── 52.html │ │ ├── 53.html │ │ ├── 54.html │ │ ├── 55.html │ │ ├── 56.html │ │ ├── 57.html │ │ ├── 58.html │ │ ├── 59.html │ │ ├── 6.html │ │ ├── 60.html │ │ ├── 61.html │ │ ├── 62.html │ │ ├── 63.html │ │ ├── 64.html │ │ ├── 65.html │ │ ├── 66.html │ │ ├── 67.html │ │ ├── 68.html │ │ ├── 69.html │ │ ├── 7.html │ │ ├── 70.html │ │ ├── 72.html │ │ ├── 73.html │ │ ├── 74.html │ │ ├── 75.html │ │ ├── 76.html │ │ ├── 77.html │ │ ├── 78.html │ │ ├── 79.html │ │ ├── 8.html │ │ ├── 80.html │ │ ├── 81.html │ │ ├── 82.html │ │ ├── 83.html │ │ ├── 84.html │ │ ├── 85.html │ │ ├── 86.html │ │ ├── 87.html │ │ ├── 88.html │ │ ├── 89.html │ │ ├── 9.html │ │ ├── 90.html │ │ ├── 91.html │ │ ├── 92.html │ │ ├── 93.html │ │ ├── 94.html │ │ ├── 95.html │ │ ├── 96.html │ │ ├── 97.html │ │ ├── 98.html │ │ └── 99.html │ ├── wa │ │ ├── 0.html │ │ ├── 1.html │ │ ├── 10.html │ │ ├── 100.html │ │ ├── 101.html │ │ ├── 102.html │ │ ├── 103.html │ │ ├── 104.html │ │ ├── 105.html │ │ ├── 106.html │ │ ├── 107.html │ │ ├── 108.html │ │ ├── 109.html │ │ ├── 11.html │ │ ├── 110.html │ │ ├── 111.html │ │ ├── 112.html │ │ ├── 113.html │ │ ├── 114.html │ │ ├── 115.html │ │ ├── 116.html │ │ ├── 117.html │ │ ├── 118.html │ │ ├── 119.html │ │ ├── 12.html │ │ ├── 120.html │ │ ├── 121.html │ │ ├── 122.html │ │ ├── 123.html │ │ ├── 124.html │ │ ├── 125.html │ │ ├── 126.html │ │ ├── 127.html │ │ ├── 128.html │ │ ├── 129.html │ │ ├── 13.html │ │ ├── 130.html │ │ ├── 131.html │ │ ├── 132.html │ │ ├── 133.html │ │ ├── 134.html │ │ ├── 139.html │ │ ├── 14.html │ │ ├── 140.html │ │ ├── 141.html │ │ ├── 142.html │ │ ├── 143.html │ │ ├── 144.html │ │ ├── 145.html │ │ ├── 146.html │ │ ├── 147.html │ │ ├── 148.html │ │ ├── 149.html │ │ ├── 15.html │ │ ├── 150.html │ │ ├── 151.html │ │ ├── 152.html │ │ ├── 153.html │ │ ├── 154.html │ │ ├── 155.html │ │ ├── 156.html │ │ ├── 157.html │ │ ├── 158.html │ │ ├── 159.html │ │ ├── 16.html │ │ ├── 160.html │ │ ├── 161.html │ │ ├── 162.html │ │ ├── 163.html │ │ ├── 164.html │ │ ├── 165.html │ │ ├── 166.html │ │ ├── 167.html │ │ ├── 168.html │ │ ├── 169.html │ │ ├── 17.html │ │ ├── 170.html │ │ ├── 171.html │ │ ├── 172.html │ │ ├── 173.html │ │ ├── 174.html │ │ ├── 175.html │ │ ├── 176.html │ │ ├── 177.html │ │ ├── 178.html │ │ ├── 179.html │ │ ├── 18.html │ │ ├── 180.html │ │ ├── 181.html │ │ ├── 182.html │ │ ├── 183.html │ │ ├── 184.html │ │ ├── 185.html │ │ ├── 188.html │ │ ├── 189.html │ │ ├── 19.html │ │ ├── 190.html │ │ ├── 191.html │ │ ├── 192.html │ │ ├── 193.html │ │ ├── 194.html │ │ ├── 195.html │ │ ├── 196.html │ │ ├── 197.html │ │ ├── 198.html │ │ ├── 199.html │ │ ├── 2.html │ │ ├── 20.html │ │ ├── 200.html │ │ ├── 201.html │ │ ├── 206.html │ │ ├── 207.html │ │ ├── 208.html │ │ ├── 209.html │ │ ├── 21.html │ │ ├── 210.html │ │ ├── 211.html │ │ ├── 212.html │ │ ├── 213.html │ │ ├── 214.html │ │ ├── 215.html │ │ ├── 216.html │ │ ├── 217.html │ │ ├── 218.html │ │ ├── 219.html │ │ ├── 22.html │ │ ├── 220.html │ │ ├── 221.html │ │ ├── 222.html │ │ ├── 223.html │ │ ├── 224.html │ │ ├── 225.html │ │ ├── 226.html │ │ ├── 227.html │ │ ├── 228.html │ │ ├── 229.html │ │ ├── 23.html │ │ ├── 230.html │ │ ├── 231.html │ │ ├── 232.html │ │ ├── 233.html │ │ ├── 234.html │ │ ├── 235.html │ │ ├── 236.html │ │ ├── 237.html │ │ ├── 238.html │ │ ├── 239.html │ │ ├── 24.html │ │ ├── 240.html │ │ ├── 241.html │ │ ├── 242.html │ │ ├── 243.html │ │ ├── 244.html │ │ ├── 245.html │ │ ├── 246.html │ │ ├── 247.html │ │ ├── 248.html │ │ ├── 249.html │ │ ├── 25.html │ │ ├── 250.html │ │ ├── 251.html │ │ ├── 252.html │ │ ├── 253.html │ │ ├── 254.html │ │ ├── 255.html │ │ ├── 256.html │ │ ├── 257.html │ │ ├── 258.html │ │ ├── 259.html │ │ ├── 26.html │ │ ├── 260.html │ │ ├── 261.html │ │ ├── 262.html │ │ ├── 263.html │ │ ├── 264.html │ │ ├── 265.html │ │ ├── 266.html │ │ ├── 267.html │ │ ├── 268.html │ │ ├── 269.html │ │ ├── 27.html │ │ ├── 270.html │ │ ├── 271.html │ │ ├── 272.html │ │ ├── 273.html │ │ ├── 274.html │ │ ├── 275.html │ │ ├── 278.html │ │ ├── 28.html │ │ ├── 284.html │ │ ├── 285.html │ │ ├── 286.html │ │ ├── 287.html │ │ ├── 288.html │ │ ├── 289.html │ │ ├── 29.html │ │ ├── 290.html │ │ ├── 291.html │ │ ├── 292.html │ │ ├── 293.html │ │ ├── 294.html │ │ ├── 295.html │ │ ├── 296.html │ │ ├── 297.html │ │ ├── 298.html │ │ ├── 299.html │ │ ├── 3.html │ │ ├── 30.html │ │ ├── 300.html │ │ ├── 304.html │ │ ├── 305.html │ │ ├── 306.html │ │ ├── 307.html │ │ ├── 308.html │ │ ├── 309.html │ │ ├── 31.html │ │ ├── 310.html │ │ ├── 311.html │ │ ├── 312.html │ │ ├── 313.html │ │ ├── 314.html │ │ ├── 315.html │ │ ├── 316.html │ │ ├── 317.html │ │ ├── 318.html │ │ ├── 319.html │ │ ├── 32.html │ │ ├── 320.html │ │ ├── 321.html │ │ ├── 322.html │ │ ├── 323.html │ │ ├── 324.html │ │ ├── 325.html │ │ ├── 326.html │ │ ├── 327.html │ │ ├── 328.html │ │ ├── 329.html │ │ ├── 33.html │ │ ├── 330.html │ │ ├── 331.html │ │ ├── 332.html │ │ ├── 333.html │ │ ├── 334.html │ │ ├── 335.html │ │ ├── 336.html │ │ ├── 337.html │ │ ├── 338.html │ │ ├── 339.html │ │ ├── 34.html │ │ ├── 340.html │ │ ├── 341.html │ │ ├── 342.html │ │ ├── 343.html │ │ ├── 35.html │ │ ├── 36.html │ │ ├── 37.html │ │ ├── 38.html │ │ ├── 39.html │ │ ├── 4.html │ │ ├── 40.html │ │ ├── 41.html │ │ ├── 42.html │ │ ├── 43.html │ │ ├── 44.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 48.html │ │ ├── 49.html │ │ ├── 5.html │ │ ├── 50.html │ │ ├── 51.html │ │ ├── 52.html │ │ ├── 53.html │ │ ├── 54.html │ │ ├── 55.html │ │ ├── 56.html │ │ ├── 57.html │ │ ├── 58.html │ │ ├── 59.html │ │ ├── 6.html │ │ ├── 60.html │ │ ├── 61.html │ │ ├── 62.html │ │ ├── 63.html │ │ ├── 64.html │ │ ├── 65.html │ │ ├── 66.html │ │ ├── 67.html │ │ ├── 68.html │ │ ├── 69.html │ │ ├── 7.html │ │ ├── 70.html │ │ ├── 72.html │ │ ├── 73.html │ │ ├── 74.html │ │ ├── 75.html │ │ ├── 76.html │ │ ├── 77.html │ │ ├── 78.html │ │ ├── 79.html │ │ ├── 8.html │ │ ├── 80.html │ │ ├── 81.html │ │ ├── 82.html │ │ ├── 83.html │ │ ├── 84.html │ │ ├── 85.html │ │ ├── 86.html │ │ ├── 87.html │ │ ├── 88.html │ │ ├── 89.html │ │ ├── 9.html │ │ ├── 90.html │ │ ├── 91.html │ │ ├── 92.html │ │ ├── 93.html │ │ ├── 94.html │ │ ├── 95.html │ │ ├── 96.html │ │ ├── 97.html │ │ ├── 98.html │ │ ├── 99.html │ │ └── hold │ │ │ ├── 135.html │ │ │ ├── 136.html │ │ │ ├── 137.html │ │ │ ├── 138.html │ │ │ ├── 186.html │ │ │ ├── 187.html │ │ │ ├── 202.html │ │ │ ├── 276.html │ │ │ ├── 280.html │ │ │ └── todo │ │ │ ├── 203.html │ │ │ ├── 204.html │ │ │ ├── 205.html │ │ │ ├── 277.html │ │ │ ├── 279.html │ │ │ ├── 281.html │ │ │ ├── 282.html │ │ │ └── 283.html │ └── webstruct.dtd ├── nl │ ├── README.rst │ ├── cleaned │ │ ├── 0.html │ │ ├── 1.html │ │ ├── 103.html │ │ ├── 104.html │ │ ├── 105.html │ │ ├── 115.html │ │ ├── 116.html │ │ ├── 12.html │ │ ├── 124.html │ │ ├── 129.html │ │ ├── 130.html │ │ ├── 135.html │ │ ├── 14.html │ │ ├── 140.html │ │ ├── 145.html │ │ ├── 153.html │ │ ├── 159.html │ │ ├── 162.html │ │ ├── 165.html │ │ ├── 166.html │ │ ├── 168.html │ │ ├── 18.html │ │ ├── 187.html │ │ ├── 188.html │ │ ├── 19.html │ │ ├── 192.html │ │ ├── 198.html │ │ ├── 20.html │ │ ├── 210.html │ │ ├── 212.html │ │ ├── 214.html │ │ ├── 218.html │ │ ├── 219.html │ │ ├── 22.html │ │ ├── 228.html │ │ ├── 231.html │ │ ├── 235.html │ │ ├── 241.html │ │ ├── 242.html │ │ ├── 243.html │ │ ├── 249.html │ │ ├── 25.html │ │ ├── 252.html │ │ ├── 257.html │ │ ├── 261.html │ │ ├── 262.html │ │ ├── 263.html │ │ ├── 266.html │ │ ├── 267.html │ │ ├── 30.html │ │ ├── 302.html │ │ ├── 306.html │ │ ├── 315.html │ │ ├── 316.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 61.html │ │ ├── 69.html │ │ ├── 83.html │ │ ├── 87.html │ │ ├── 90.html │ │ ├── 96.html │ │ └── 97.html │ ├── source │ │ ├── 0.html │ │ ├── 1.html │ │ ├── 103.html │ │ ├── 104.html │ │ ├── 105.html │ │ ├── 115.html │ │ ├── 116.html │ │ ├── 12.html │ │ ├── 124.html │ │ ├── 129.html │ │ ├── 130.html │ │ ├── 135.html │ │ ├── 14.html │ │ ├── 140.html │ │ ├── 145.html │ │ ├── 153.html │ │ ├── 159.html │ │ ├── 162.html │ │ ├── 165.html │ │ ├── 166.html │ │ ├── 168.html │ │ ├── 18.html │ │ ├── 187.html │ │ ├── 188.html │ │ ├── 19.html │ │ ├── 192.html │ │ ├── 198.html │ │ ├── 20.html │ │ ├── 210.html │ │ ├── 212.html │ │ ├── 214.html │ │ ├── 218.html │ │ ├── 219.html │ │ ├── 22.html │ │ ├── 228.html │ │ ├── 231.html │ │ ├── 235.html │ │ ├── 241.html │ │ ├── 242.html │ │ ├── 243.html │ │ ├── 249.html │ │ ├── 25.html │ │ ├── 252.html │ │ ├── 257.html │ │ ├── 261.html │ │ ├── 262.html │ │ ├── 263.html │ │ ├── 266.html │ │ ├── 267.html │ │ ├── 30.html │ │ ├── 302.html │ │ ├── 306.html │ │ ├── 315.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 61.html │ │ ├── 69.html │ │ ├── 83.html │ │ ├── 87.html │ │ ├── 90.html │ │ ├── 96.html │ │ └── 97.html │ ├── wa │ │ ├── 0.html │ │ ├── 1.html │ │ ├── 103.html │ │ ├── 104.html │ │ ├── 115.html │ │ ├── 116.html │ │ ├── 124.html │ │ ├── 129.html │ │ ├── 130.html │ │ ├── 135.html │ │ ├── 14.html │ │ ├── 140.html │ │ ├── 145.html │ │ ├── 153.html │ │ ├── 159.html │ │ ├── 162.html │ │ ├── 165.html │ │ ├── 166.html │ │ ├── 168.html │ │ ├── 18.html │ │ ├── 187.html │ │ ├── 188.html │ │ ├── 19.html │ │ ├── 192.html │ │ ├── 198.html │ │ ├── 20.html │ │ ├── 210.html │ │ ├── 212.html │ │ ├── 214.html │ │ ├── 218.html │ │ ├── 219.html │ │ ├── 22.html │ │ ├── 228.html │ │ ├── 231.html │ │ ├── 235.html │ │ ├── 241.html │ │ ├── 242.html │ │ ├── 243.html │ │ ├── 25.html │ │ ├── 252.html │ │ ├── 262.html │ │ ├── 263.html │ │ ├── 266.html │ │ ├── 267.html │ │ ├── 30.html │ │ ├── 302.html │ │ ├── 306.html │ │ ├── 315.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 61.html │ │ ├── 69.html │ │ ├── 83.html │ │ ├── 87.html │ │ ├── 96.html │ │ └── 97.html │ └── websites.txt ├── random_pages │ ├── README.rst │ ├── cleaned │ │ ├── 1.html │ │ ├── 10.html │ │ ├── 11.html │ │ ├── 12.html │ │ ├── 13.html │ │ ├── 14.html │ │ ├── 15.html │ │ ├── 16.html │ │ ├── 17.html │ │ ├── 18.html │ │ ├── 19.html │ │ ├── 2.html │ │ ├── 20.html │ │ ├── 21.html │ │ ├── 22.html │ │ ├── 23.html │ │ ├── 24.html │ │ ├── 25.html │ │ ├── 26.html │ │ ├── 27.html │ │ ├── 28.html │ │ ├── 29.html │ │ ├── 3.html │ │ ├── 30.html │ │ ├── 31.html │ │ ├── 32.html │ │ ├── 33.html │ │ ├── 34.html │ │ ├── 35.html │ │ ├── 36.html │ │ ├── 37.html │ │ ├── 38.html │ │ ├── 39.html │ │ ├── 4.html │ │ ├── 40.html │ │ ├── 41.html │ │ ├── 42.html │ │ ├── 43.html │ │ ├── 44.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 48.html │ │ ├── 49.html │ │ ├── 5.html │ │ ├── 50.html │ │ ├── 51.html │ │ ├── 52.html │ │ ├── 53.html │ │ ├── 54.html │ │ ├── 6.html │ │ ├── 7.html │ │ ├── 8.html │ │ └── 9.html │ ├── source │ │ ├── 1.html │ │ ├── 10.html │ │ ├── 11.html │ │ ├── 12.html │ │ ├── 13.html │ │ ├── 14.html │ │ ├── 15.html │ │ ├── 16.html │ │ ├── 17.html │ │ ├── 18.html │ │ ├── 19.html │ │ ├── 2.html │ │ ├── 20.html │ │ ├── 21.html │ │ ├── 22.html │ │ ├── 23.html │ │ ├── 24.html │ │ ├── 25.html │ │ ├── 26.html │ │ ├── 27.html │ │ ├── 28.html │ │ ├── 29.html │ │ ├── 3.html │ │ ├── 30.html │ │ ├── 31.html │ │ ├── 32.html │ │ ├── 33.html │ │ ├── 34.html │ │ ├── 35.html │ │ ├── 36.html │ │ ├── 37.html │ │ ├── 38.html │ │ ├── 39.html │ │ ├── 4.html │ │ ├── 40.html │ │ ├── 41.html │ │ ├── 42.html │ │ ├── 43.html │ │ ├── 44.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 48.html │ │ ├── 49.html │ │ ├── 5.html │ │ ├── 50.html │ │ ├── 51.html │ │ ├── 52.html │ │ ├── 53.html │ │ ├── 54.html │ │ ├── 6.html │ │ ├── 7.html │ │ ├── 8.html │ │ └── 9.html │ └── wa │ │ ├── 1.html │ │ ├── 10.html │ │ ├── 12.html │ │ ├── 13.html │ │ ├── 14.html │ │ ├── 15.html │ │ ├── 16.html │ │ ├── 17.html │ │ ├── 18.html │ │ ├── 19.html │ │ ├── 2.html │ │ ├── 20.html │ │ ├── 21.html │ │ ├── 22.html │ │ ├── 23.html │ │ ├── 24.html │ │ ├── 25.html │ │ ├── 26.html │ │ ├── 27.html │ │ ├── 28.html │ │ ├── 29.html │ │ ├── 3.html │ │ ├── 30.html │ │ ├── 31.html │ │ ├── 32.html │ │ ├── 33.html │ │ ├── 34.html │ │ ├── 35.html │ │ ├── 36.html │ │ ├── 37.html │ │ ├── 38.html │ │ ├── 39.html │ │ ├── 4.html │ │ ├── 40.html │ │ ├── 41.html │ │ ├── 42.html │ │ ├── 43.html │ │ ├── 44.html │ │ ├── 45.html │ │ ├── 46.html │ │ ├── 47.html │ │ ├── 48.html │ │ ├── 49.html │ │ ├── 5.html │ │ ├── 50.html │ │ ├── 51.html │ │ ├── 52.html │ │ ├── 53.html │ │ ├── 54.html │ │ ├── 6.html │ │ ├── 7.html │ │ ├── 8.html │ │ └── 9.html └── us_contact_pages │ ├── README.rst │ ├── cleaned │ ├── 1.html │ ├── 10.html │ ├── 100.html │ ├── 101.html │ ├── 102.html │ ├── 11.html │ ├── 12.html │ ├── 13.html │ ├── 14.html │ ├── 15.html │ ├── 16.html │ ├── 17.html │ ├── 18.html │ ├── 19.html │ ├── 2.html │ ├── 20.html │ ├── 21.html │ ├── 22.html │ ├── 23.html │ ├── 24.html │ ├── 25.html │ ├── 26.html │ ├── 27.html │ ├── 28.html │ ├── 29.html │ ├── 3.html │ ├── 30.html │ ├── 31.html │ ├── 32.html │ ├── 33.html │ ├── 34.html │ ├── 35.html │ ├── 36.html │ ├── 37.html │ ├── 38.html │ ├── 39.html │ ├── 4.html │ ├── 40.html │ ├── 41.html │ ├── 42.html │ ├── 43.html │ ├── 44.html │ ├── 46.html │ ├── 47.html │ ├── 48.html │ ├── 49.html │ ├── 5.html │ ├── 50.html │ ├── 51.html │ ├── 52.html │ ├── 53.html │ ├── 54.html │ ├── 55.html │ ├── 56.html │ ├── 57.html │ ├── 58.html │ ├── 59.html │ ├── 6.html │ ├── 60.html │ ├── 61.html │ ├── 62.html │ ├── 63.html │ ├── 64.html │ ├── 65.html │ ├── 66.html │ ├── 67.html │ ├── 68.html │ ├── 69.html │ ├── 7.html │ ├── 70.html │ ├── 71.html │ ├── 72.html │ ├── 73.html │ ├── 74.html │ ├── 75.html │ ├── 76.html │ ├── 77.html │ ├── 78.html │ ├── 79.html │ ├── 8.html │ ├── 80.html │ ├── 81.html │ ├── 82.html │ ├── 83.html │ ├── 84.html │ ├── 85.html │ ├── 86.html │ ├── 87.html │ ├── 88.html │ ├── 89.html │ ├── 9.html │ ├── 90.html │ ├── 91.html │ ├── 92.html │ ├── 93.html │ ├── 94.html │ ├── 95.html │ ├── 96.html │ ├── 97.html │ ├── 98.html │ └── 99.html │ ├── source │ ├── 1.html │ ├── 10.html │ ├── 100.html │ ├── 101.html │ ├── 102.html │ ├── 11.html │ ├── 12.html │ ├── 13.html │ ├── 14.html │ ├── 15.html │ ├── 16.html │ ├── 17.html │ ├── 18.html │ ├── 19.html │ ├── 2.html │ ├── 20.html │ ├── 21.html │ ├── 22.html │ ├── 23.html │ ├── 24.html │ ├── 25.html │ ├── 26.html │ ├── 27.html │ ├── 28.html │ ├── 29.html │ ├── 3.html │ ├── 30.html │ ├── 31.html │ ├── 32.html │ ├── 33.html │ ├── 34.html │ ├── 35.html │ ├── 36.html │ ├── 37.html │ ├── 38.html │ ├── 39.html │ ├── 4.html │ ├── 40.html │ ├── 41.html │ ├── 42.html │ ├── 43.html │ ├── 44.html │ ├── 46.html │ ├── 47.html │ ├── 48.html │ ├── 49.html │ ├── 5.html │ ├── 50.html │ ├── 51.html │ ├── 52.html │ ├── 53.html │ ├── 54.html │ ├── 55.html │ ├── 56.html │ ├── 57.html │ ├── 58.html │ ├── 59.html │ ├── 6.html │ ├── 60.html │ ├── 61.html │ ├── 62.html │ ├── 63.html │ ├── 64.html │ ├── 65.html │ ├── 66.html │ ├── 67.html │ ├── 68.html │ ├── 69.html │ ├── 7.html │ ├── 70.html │ ├── 71.html │ ├── 72.html │ ├── 73.html │ ├── 74.html │ ├── 75.html │ ├── 76.html │ ├── 77.html │ ├── 78.html │ ├── 79.html │ ├── 8.html │ ├── 80.html │ ├── 81.html │ ├── 82.html │ ├── 83.html │ ├── 84.html │ ├── 85.html │ ├── 86.html │ ├── 87.html │ ├── 88.html │ ├── 89.html │ ├── 9.html │ ├── 90.html │ ├── 91.html │ ├── 92.html │ ├── 93.html │ ├── 94.html │ ├── 95.html │ ├── 96.html │ ├── 97.html │ ├── 98.html │ └── 99.html │ └── wa │ ├── 1.html │ ├── 10.html │ ├── 100.html │ ├── 101.html │ ├── 102.html │ ├── 11.html │ ├── 12.html │ ├── 13.html │ ├── 14.html │ ├── 15.html │ ├── 16.html │ ├── 17.html │ ├── 18.html │ ├── 19.html │ ├── 2.html │ ├── 20.html │ ├── 21.html │ ├── 22.html │ ├── 23.html │ ├── 24.html │ ├── 25.html │ ├── 26.html │ ├── 27.html │ ├── 28.html │ ├── 29.html │ ├── 3.html │ ├── 30.html │ ├── 31.html │ ├── 32.html │ ├── 33.html │ ├── 34.html │ ├── 35.html │ ├── 36.html │ ├── 37.html │ ├── 38.html │ ├── 39.html │ ├── 4.html │ ├── 40.html │ ├── 41.html │ ├── 42.html │ ├── 43.html │ ├── 44.html │ ├── 46.html │ ├── 47.html │ ├── 48.html │ ├── 49.html │ ├── 5.html │ ├── 50.html │ ├── 51.html │ ├── 52.html │ ├── 53.html │ ├── 54.html │ ├── 55.html │ ├── 56.html │ ├── 57.html │ ├── 58.html │ ├── 59.html │ ├── 6.html │ ├── 60.html │ ├── 61.html │ ├── 62.html │ ├── 63.html │ ├── 64.html │ ├── 65.html │ ├── 66.html │ ├── 67.html │ ├── 68.html │ ├── 69.html │ ├── 7.html │ ├── 70.html │ ├── 71.html │ ├── 72.html │ ├── 73.html │ ├── 74.html │ ├── 75.html │ ├── 76.html │ ├── 77.html │ ├── 78.html │ ├── 79.html │ ├── 8.html │ ├── 80.html │ ├── 81.html │ ├── 82.html │ ├── 83.html │ ├── 84.html │ ├── 85.html │ ├── 86.html │ ├── 87.html │ ├── 88.html │ ├── 89.html │ ├── 9.html │ ├── 90.html │ ├── 91.html │ ├── 92.html │ ├── 93.html │ ├── 94.html │ ├── 95.html │ ├── 96.html │ ├── 97.html │ ├── 98.html │ └── 99.html ├── gazetteers └── countries │ ├── README.rst │ └── countries.txt └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/.gitignore -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/.travis.yml -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/CHANGES.rst -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/README.rst -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/codecov.yml -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/Makefile -------------------------------------------------------------------------------- /docs/changes.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGES.rst 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/conf.py -------------------------------------------------------------------------------- /docs/images/wa-buttons.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/images/wa-buttons.png -------------------------------------------------------------------------------- /docs/images/wa-options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/images/wa-options.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/index.rst -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/intro.rst -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/make.bat -------------------------------------------------------------------------------- /docs/ref/base.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/base.rst -------------------------------------------------------------------------------- /docs/ref/crfsuite.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/crfsuite.rst -------------------------------------------------------------------------------- /docs/ref/features.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/features.rst -------------------------------------------------------------------------------- /docs/ref/grouping.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/grouping.rst -------------------------------------------------------------------------------- /docs/ref/index.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/index.rst -------------------------------------------------------------------------------- /docs/ref/loaders.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/loaders.rst -------------------------------------------------------------------------------- /docs/ref/metrics.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/metrics.rst -------------------------------------------------------------------------------- /docs/ref/misc.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/misc.rst -------------------------------------------------------------------------------- /docs/ref/model.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/model.rst -------------------------------------------------------------------------------- /docs/ref/wapiti.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/wapiti.rst -------------------------------------------------------------------------------- /docs/ref/webannotator.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/ref/webannotator.rst -------------------------------------------------------------------------------- /docs/tutorial.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/docs/tutorial.rst -------------------------------------------------------------------------------- /example/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/README.rst -------------------------------------------------------------------------------- /example/ner/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | -------------------------------------------------------------------------------- /example/ner/build_gazetteers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/ner/build_gazetteers.py -------------------------------------------------------------------------------- /example/ner/cv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/ner/cv.py -------------------------------------------------------------------------------- /example/ner/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/ner/data.py -------------------------------------------------------------------------------- /example/ner/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/ner/train.py -------------------------------------------------------------------------------- /example/ner/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/ner/utils.py -------------------------------------------------------------------------------- /example/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/example/requirements.txt -------------------------------------------------------------------------------- /ideas.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/ideas.rst -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/requirements-dev.txt -------------------------------------------------------------------------------- /requirements-doc.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/requirements-doc.txt -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/setup.py -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/tox.ini -------------------------------------------------------------------------------- /webstruct/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/__init__.py -------------------------------------------------------------------------------- /webstruct/_fileresource.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/_fileresource.py -------------------------------------------------------------------------------- /webstruct/annotation_converter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/annotation_converter.py -------------------------------------------------------------------------------- /webstruct/annotation_verifier.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/annotation_verifier.py -------------------------------------------------------------------------------- /webstruct/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/base.py -------------------------------------------------------------------------------- /webstruct/crfsuite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/crfsuite.py -------------------------------------------------------------------------------- /webstruct/feature_extraction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/feature_extraction.py -------------------------------------------------------------------------------- /webstruct/features/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/features/__init__.py -------------------------------------------------------------------------------- /webstruct/features/block_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/features/block_features.py -------------------------------------------------------------------------------- /webstruct/features/data_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/features/data_features.py -------------------------------------------------------------------------------- /webstruct/features/datetime_format.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/features/datetime_format.py -------------------------------------------------------------------------------- /webstruct/features/global_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/features/global_features.py -------------------------------------------------------------------------------- /webstruct/features/token_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/features/token_features.py -------------------------------------------------------------------------------- /webstruct/gazetteers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/gazetteers/__init__.py -------------------------------------------------------------------------------- /webstruct/gazetteers/features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/gazetteers/features.py -------------------------------------------------------------------------------- /webstruct/gazetteers/geonames.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/gazetteers/geonames.py -------------------------------------------------------------------------------- /webstruct/grouping.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/grouping.py -------------------------------------------------------------------------------- /webstruct/html_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/html_tokenizer.py -------------------------------------------------------------------------------- /webstruct/html_tokenizer_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/html_tokenizer_benchmark.py -------------------------------------------------------------------------------- /webstruct/infer_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/infer_domain.py -------------------------------------------------------------------------------- /webstruct/loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/loaders.py -------------------------------------------------------------------------------- /webstruct/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/metrics.py -------------------------------------------------------------------------------- /webstruct/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/model.py -------------------------------------------------------------------------------- /webstruct/model_benchmark.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/model_benchmark.py -------------------------------------------------------------------------------- /webstruct/sequence_encoding.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/sequence_encoding.py -------------------------------------------------------------------------------- /webstruct/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/__init__.py -------------------------------------------------------------------------------- /webstruct/tests/data/wa1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/data/wa1.html -------------------------------------------------------------------------------- /webstruct/tests/data/wa2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/data/wa2.html -------------------------------------------------------------------------------- /webstruct/tests/test_crfsuite.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_crfsuite.py -------------------------------------------------------------------------------- /webstruct/tests/test_html_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_html_tokenizer.py -------------------------------------------------------------------------------- /webstruct/tests/test_html_tools.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_html_tools.py -------------------------------------------------------------------------------- /webstruct/tests/test_infer_domain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_infer_domain.py -------------------------------------------------------------------------------- /webstruct/tests/test_loaders.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_loaders.py -------------------------------------------------------------------------------- /webstruct/tests/test_pattern_features.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_pattern_features.py -------------------------------------------------------------------------------- /webstruct/tests/test_text_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_text_tokenizer.py -------------------------------------------------------------------------------- /webstruct/tests/test_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_utils.py -------------------------------------------------------------------------------- /webstruct/tests/test_wapiti.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_wapiti.py -------------------------------------------------------------------------------- /webstruct/tests/test_webannotator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/test_webannotator.py -------------------------------------------------------------------------------- /webstruct/tests/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/tests/utils.py -------------------------------------------------------------------------------- /webstruct/text_tokenizers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/text_tokenizers.py -------------------------------------------------------------------------------- /webstruct/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/utils.py -------------------------------------------------------------------------------- /webstruct/wapiti.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/wapiti.py -------------------------------------------------------------------------------- /webstruct/webannotator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct/webannotator.py -------------------------------------------------------------------------------- /webstruct_data/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/README.rst -------------------------------------------------------------------------------- /webstruct_data/clean_html.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/clean_html.py -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/README.rst -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/0.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/0.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/2.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/3.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/4.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/5.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/6.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/7.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/8.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/source/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/source/9.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/0.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/0.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/10.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/10.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/100.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/100.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/101.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/101.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/102.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/102.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/103.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/103.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/104.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/104.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/105.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/105.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/106.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/106.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/107.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/107.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/108.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/108.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/109.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/109.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/11.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/11.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/110.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/110.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/111.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/111.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/112.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/112.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/113.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/113.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/114.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/114.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/115.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/115.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/116.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/116.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/117.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/117.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/118.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/118.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/119.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/119.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/120.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/120.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/121.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/121.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/122.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/122.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/123.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/123.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/124.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/124.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/125.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/125.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/126.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/126.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/127.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/127.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/128.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/128.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/129.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/129.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/13.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/13.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/130.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/130.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/131.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/131.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/132.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/132.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/133.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/133.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/134.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/134.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/139.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/139.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/140.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/140.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/141.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/141.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/142.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/142.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/143.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/143.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/144.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/144.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/145.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/145.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/146.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/146.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/147.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/147.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/148.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/148.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/149.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/149.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/15.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/15.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/150.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/150.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/151.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/151.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/152.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/152.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/153.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/153.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/154.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/154.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/155.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/155.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/156.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/156.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/157.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/157.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/158.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/158.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/159.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/159.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/16.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/160.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/160.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/161.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/161.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/162.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/162.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/163.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/163.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/164.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/164.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/165.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/165.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/166.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/166.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/167.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/167.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/168.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/168.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/169.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/169.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/17.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/17.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/170.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/170.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/171.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/171.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/172.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/172.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/173.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/173.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/174.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/174.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/175.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/175.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/176.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/176.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/177.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/177.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/178.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/178.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/179.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/179.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/180.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/180.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/181.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/181.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/182.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/182.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/183.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/183.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/184.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/184.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/185.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/185.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/188.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/188.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/189.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/189.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/190.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/190.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/191.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/191.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/192.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/192.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/193.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/193.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/194.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/194.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/195.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/195.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/196.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/196.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/197.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/197.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/198.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/198.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/199.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/199.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/2.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/200.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/200.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/201.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/201.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/206.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/206.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/207.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/207.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/208.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/208.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/209.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/209.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/21.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/21.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/210.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/210.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/211.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/211.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/212.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/212.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/213.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/213.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/214.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/214.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/215.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/215.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/216.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/216.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/217.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/217.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/218.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/218.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/219.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/219.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/220.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/220.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/221.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/221.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/222.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/222.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/223.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/223.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/224.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/224.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/225.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/225.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/226.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/226.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/227.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/227.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/228.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/228.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/229.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/229.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/23.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/23.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/230.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/230.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/231.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/231.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/232.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/232.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/233.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/233.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/234.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/234.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/235.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/235.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/236.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/236.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/237.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/237.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/238.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/238.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/239.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/239.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/24.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/24.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/240.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/240.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/241.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/241.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/242.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/242.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/243.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/243.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/244.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/244.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/245.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/245.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/246.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/246.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/247.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/247.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/248.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/248.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/249.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/249.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/250.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/250.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/251.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/251.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/252.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/252.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/253.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/253.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/254.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/254.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/255.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/255.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/256.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/256.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/257.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/257.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/258.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/258.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/259.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/259.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/26.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/26.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/260.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/260.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/261.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/261.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/262.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/262.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/263.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/263.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/264.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/264.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/265.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/265.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/266.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/266.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/267.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/267.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/268.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/268.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/269.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/269.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/27.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/27.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/270.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/270.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/271.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/271.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/272.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/272.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/273.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/273.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/274.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/274.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/275.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/275.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/278.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/278.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/28.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/28.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/284.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/284.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/285.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/285.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/286.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/286.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/287.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/287.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/288.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/288.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/289.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/289.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/29.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/29.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/290.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/290.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/291.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/291.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/292.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/292.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/293.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/293.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/294.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/294.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/295.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/295.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/296.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/296.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/297.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/297.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/298.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/298.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/299.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/299.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/3.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/300.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/300.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/304.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/304.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/305.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/305.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/306.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/306.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/307.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/307.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/308.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/308.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/309.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/309.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/31.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/31.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/310.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/310.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/311.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/311.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/312.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/312.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/313.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/313.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/314.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/314.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/315.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/315.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/316.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/316.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/317.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/317.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/318.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/318.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/319.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/319.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/32.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/32.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/320.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/320.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/321.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/321.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/322.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/322.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/323.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/323.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/324.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/324.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/325.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/325.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/326.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/326.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/327.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/327.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/328.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/328.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/329.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/329.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/33.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/33.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/330.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/330.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/331.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/331.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/332.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/332.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/333.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/333.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/334.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/334.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/335.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/335.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/336.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/336.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/337.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/337.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/338.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/338.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/339.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/339.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/34.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/34.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/340.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/340.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/341.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/341.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/342.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/342.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/343.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/343.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/35.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/35.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/36.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/36.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/37.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/37.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/38.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/38.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/39.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/39.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/4.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/40.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/40.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/41.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/41.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/42.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/42.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/43.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/43.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/44.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/44.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/48.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/48.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/49.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/49.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/5.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/50.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/50.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/51.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/51.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/52.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/52.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/53.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/53.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/54.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/54.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/55.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/55.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/56.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/56.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/57.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/57.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/58.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/58.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/59.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/59.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/6.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/60.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/60.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/61.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/61.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/62.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/62.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/63.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/63.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/64.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/64.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/65.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/65.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/66.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/66.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/67.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/67.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/68.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/68.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/69.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/69.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/7.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/70.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/70.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/72.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/72.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/73.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/73.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/74.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/74.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/75.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/75.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/76.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/76.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/77.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/77.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/78.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/78.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/79.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/79.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/8.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/80.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/80.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/81.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/81.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/82.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/82.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/83.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/83.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/84.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/84.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/85.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/85.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/86.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/86.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/87.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/87.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/88.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/88.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/89.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/89.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/9.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/90.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/90.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/91.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/91.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/92.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/92.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/93.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/93.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/94.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/94.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/95.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/95.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/96.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/96.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/97.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/97.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/98.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/98.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/wa/99.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/wa/99.html -------------------------------------------------------------------------------- /webstruct_data/corpus/business_pages/webstruct.dtd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/business_pages/webstruct.dtd -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/README.rst -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/0.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/0.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/103.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/103.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/104.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/104.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/105.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/105.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/115.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/115.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/116.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/116.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/124.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/124.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/129.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/129.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/130.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/130.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/135.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/135.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/140.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/140.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/145.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/145.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/153.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/153.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/159.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/159.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/162.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/162.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/165.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/165.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/166.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/166.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/168.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/168.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/187.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/187.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/188.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/188.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/192.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/192.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/198.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/198.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/210.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/210.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/212.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/212.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/214.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/214.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/218.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/218.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/219.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/219.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/228.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/228.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/231.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/231.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/235.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/235.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/241.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/241.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/242.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/242.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/243.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/243.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/249.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/249.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/252.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/252.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/257.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/257.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/261.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/261.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/262.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/262.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/263.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/263.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/266.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/266.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/267.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/267.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/302.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/302.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/306.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/306.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/315.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/315.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/316.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/316.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/61.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/61.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/69.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/69.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/83.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/83.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/87.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/87.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/90.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/90.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/96.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/96.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/cleaned/97.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/cleaned/97.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/0.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/0.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/103.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/103.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/104.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/104.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/105.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/105.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/115.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/115.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/116.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/116.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/124.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/124.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/129.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/129.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/130.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/130.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/135.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/135.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/140.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/140.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/145.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/145.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/153.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/153.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/159.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/159.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/162.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/162.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/165.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/165.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/166.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/166.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/168.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/168.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/187.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/187.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/188.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/188.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/192.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/192.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/198.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/198.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/210.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/210.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/212.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/212.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/214.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/214.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/218.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/218.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/219.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/219.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/228.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/228.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/231.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/231.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/235.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/235.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/241.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/241.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/242.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/242.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/243.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/243.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/249.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/249.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/252.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/252.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/257.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/257.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/261.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/261.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/262.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/262.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/263.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/263.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/266.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/266.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/267.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/267.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/302.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/302.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/306.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/306.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/315.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/315.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/61.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/61.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/69.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/69.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/83.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/83.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/87.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/87.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/90.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/90.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/96.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/96.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/source/97.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/source/97.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/0.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/0.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/103.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/103.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/104.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/104.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/115.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/115.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/116.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/116.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/124.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/124.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/129.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/129.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/130.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/130.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/135.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/135.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/140.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/140.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/145.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/145.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/153.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/153.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/159.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/159.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/162.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/162.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/165.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/165.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/166.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/166.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/168.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/168.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/187.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/187.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/188.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/188.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/192.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/192.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/198.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/198.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/210.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/210.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/212.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/212.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/214.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/214.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/218.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/218.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/219.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/219.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/228.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/228.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/231.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/231.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/235.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/235.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/241.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/241.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/242.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/242.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/243.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/243.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/252.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/252.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/262.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/262.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/263.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/263.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/266.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/266.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/267.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/267.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/302.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/302.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/306.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/306.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/315.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/315.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/61.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/61.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/69.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/69.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/83.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/83.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/87.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/87.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/96.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/96.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/wa/97.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/wa/97.html -------------------------------------------------------------------------------- /webstruct_data/corpus/nl/websites.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/nl/websites.txt -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/README.rst -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/10.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/10.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/11.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/11.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/13.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/13.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/15.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/15.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/16.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/17.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/17.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/2.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/21.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/21.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/23.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/23.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/24.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/24.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/26.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/26.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/27.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/27.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/28.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/28.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/29.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/29.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/3.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/31.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/31.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/32.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/32.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/33.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/33.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/34.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/34.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/35.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/35.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/36.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/36.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/37.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/37.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/38.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/38.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/39.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/39.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/4.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/40.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/40.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/41.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/41.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/42.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/42.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/43.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/43.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/44.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/44.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/48.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/48.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/49.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/49.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/5.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/50.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/50.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/51.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/51.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/52.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/52.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/53.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/53.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/54.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/54.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/6.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/7.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/8.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/cleaned/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/cleaned/9.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/10.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/10.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/11.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/11.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/13.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/13.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/15.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/15.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/16.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/17.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/17.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/2.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/21.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/21.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/23.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/23.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/24.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/24.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/26.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/26.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/27.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/27.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/28.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/28.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/29.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/29.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/3.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/31.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/31.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/32.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/32.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/33.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/33.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/34.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/34.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/35.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/35.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/36.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/36.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/37.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/37.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/38.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/38.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/39.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/39.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/4.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/40.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/40.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/41.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/41.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/42.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/42.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/43.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/43.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/44.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/44.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/48.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/48.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/49.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/49.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/5.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/50.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/50.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/51.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/51.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/52.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/52.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/53.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/53.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/54.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/54.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/6.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/7.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/8.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/source/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/source/9.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/10.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/10.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/13.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/13.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/15.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/15.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/16.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/17.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/17.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/18.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/18.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/19.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/19.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/2.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/20.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/20.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/21.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/21.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/22.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/22.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/23.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/23.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/24.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/24.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/25.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/25.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/26.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/26.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/27.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/27.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/28.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/28.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/29.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/29.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/3.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/30.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/30.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/31.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/31.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/32.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/32.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/33.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/33.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/34.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/34.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/35.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/35.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/36.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/36.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/37.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/37.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/38.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/38.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/39.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/39.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/4.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/40.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/40.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/41.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/41.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/42.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/42.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/43.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/43.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/44.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/44.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/45.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/45.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/46.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/46.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/47.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/47.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/48.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/48.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/49.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/49.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/5.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/50.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/50.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/51.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/51.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/52.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/52.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/53.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/53.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/54.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/54.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/6.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/7.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/8.html -------------------------------------------------------------------------------- /webstruct_data/corpus/random_pages/wa/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/random_pages/wa/9.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/README.rst -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/1.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/1.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/10.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/10.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/100.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/100.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/101.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/101.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/102.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/102.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/11.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/11.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/12.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/12.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/13.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/13.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/14.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/14.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/15.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/15.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/16.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/16.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/2.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/2.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/3.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/3.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/4.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/4.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/5.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/5.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/6.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/6.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/7.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/7.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/8.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/8.html -------------------------------------------------------------------------------- /webstruct_data/corpus/us_contact_pages/wa/9.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/corpus/us_contact_pages/wa/9.html -------------------------------------------------------------------------------- /webstruct_data/gazetteers/countries/README.rst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scrapinghub/webstruct/HEAD/webstruct_data/gazetteers/countries/README.rst -------------------------------------------------------------------------------- /webstruct_data/requirements.txt: -------------------------------------------------------------------------------- 1 | lxml 2 | docopt 3 | w3lib 4 | --------------------------------------------------------------------------------