├── LICENSE ├── README.md ├── index.html ├── index.js └── styles.css /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Julio J. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # regex-web-scraping 2 | Web Scraping con Expresiones Regulares ... ¡nivel Ninja! 3 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Juegos PS4 con descuento 7 | 8 | 9 | 10 | 11 | 12 |

Juegos de PS4 con +40% descuento en ML

13 |
14 | 15 |
16 |
17 |
18 | 19 |
Demostración de Web Scraping con Javascript y Expresiones Regulares.
20 |
by @jjyepez
21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /* 2 | Demostración de Web Scraping con Javascript y Expresiones Regulares. 3 | Autor: Julio J. - @jjyepez 4 | Fecha: 24/06/2018 5 | */ 6 | 7 | // --- si quieres probar con otros criterios de filtrado, remplaza la URL obtenida en ML en la sig linea --- jjy 8 | const urlML = "https://videojuegos.mercadolibre.com.co/juego-ps4_Envio_Gratis_DisplayType_LF_OrderId_PRICE*DESC_Discount_40-100" 9 | 10 | iniciar() 11 | 12 | function iniciar(){ 13 | traerHTML( urlML ) 14 | actualizarFecha() 15 | } 16 | 17 | function traerHTML( urlML ){ 18 | // --- servicio propio para superar restricciones CORS ... by jjyepez 19 | const URL = "https://noesishosting.com/sw/cors/?a=cors&url=" + urlML 20 | 21 | // --- traer el HTML del sitio web 22 | fetch( URL ) 23 | .then( rsp => rsp.text() ) 24 | .then( rslt => { 25 | const jsonDatosExtraidos = procesarResultados( rslt ) 26 | renderDatosExtraidos( jsonDatosExtraidos ) 27 | }) 28 | .catch( err => { console.log( err ) }) 29 | } 30 | 31 | // --- procesar con RegExp 32 | function procesarResultados( html ){ 33 | const jsonDatos = [] 34 | var matches = null 35 | 36 | // --- RegExp by jjyepez 37 | const expresionRegular = new RegExp( 38 | "(.*?)(?:.*?)fraction\">(.*?)?(?:.*?)discount(?:.*?)>(.*?\%)" 39 | , "g" ) 40 | 41 | do { 42 | // --- extrayendo datos 43 | matches = expresionRegular.exec( html ) 44 | jsonDatos.push( matches ) 45 | 46 | } 47 | while ( matches !== null ) 48 | return jsonDatos 49 | } 50 | 51 | function renderDatosExtraidos( datos ){ 52 | const $divResultados = document.getElementById('resultados') 53 | $divResultados.innerHTML = '' 54 | datos.forEach( reg => { 55 | if( reg ){ 56 | const $card = document.createElement('div') 57 | $card.classList.add('card') 58 | $card.addEventListener('click', ()=>{ 59 | window.open(reg[3]) 60 | }) 61 | $card.innerHTML = ` 62 |
63 | 64 |
65 |
${reg[1]}
66 |
67 |
${reg[4]}
68 |
-${reg[6]} OFF
69 |
70 |
$ ${reg[5]}
71 | ` 72 | $divResultados.appendChild( $card ) 73 | } 74 | }) 75 | } 76 | 77 | function actualizarFecha() { 78 | const hoy = new Date() 79 | const $fecha = document.getElementById('fecha') 80 | $fecha.innerHTML = `Actualizado al: ${('0'+hoy.getDate()).substr(-2)}/${('0'+(hoy.getMonth()+1)).substr(-2)}/${hoy.getFullYear()}` 81 | } -------------------------------------------------------------------------------- /styles.css: -------------------------------------------------------------------------------- 1 | body { 2 | display: flex; 3 | flex-direction: column; 4 | align-items: center; 5 | font-family: 'Montserrat', sans-serif; 6 | margin-bottom: 3rem; 7 | } 8 | h1 { 9 | color: #333; 10 | font-size: 1.5rem; 11 | margin: 3rem 0 .5rem; 12 | } 13 | #fecha { 14 | margin-bottom: 2rem; 15 | color: #999; 16 | } 17 | #resultados { 18 | width: 90vw; 19 | max-width: 1024px; 20 | min-height: 65vh; 21 | display: grid; 22 | grid-gap: 1rem; 23 | justify-content: center; 24 | align-content: center; 25 | } 26 | @media screen and (min-width: 601px) { 27 | #resultados { 28 | grid-template-columns: 24% 24% 24% 24%; 29 | } 30 | } 31 | @media screen and (max-width: 600px) { 32 | #resultados { 33 | grid-template-columns: 45% 45%; 34 | } 35 | } 36 | .card { 37 | font-size: 90%; 38 | transition: all .35s ease; 39 | border: 1px solid #eee; 40 | border-radius: .5rem; 41 | box-shadow: 0 2px 25px rgba(0,0,0,.15); 42 | justify-content: center; 43 | display: flex; 44 | flex-direction: column; 45 | align-items: center; 46 | cursor: pointer; 47 | } 48 | .card:hover{ 49 | box-shadow: 0 5px 50px rgba(0,0,0,.25);; 50 | } 51 | .card div { 52 | padding: 0 1rem; 53 | } 54 | .card img { 55 | margin: 1rem; 56 | margin-bottom: .5rem; 57 | } 58 | .card .titulo{ 59 | font-size: 100%; 60 | margin-bottom: .5rem; 61 | } 62 | .card .precio-ant { 63 | display: flex; 64 | } 65 | .card .precio { 66 | font-size: small; 67 | text-decoration: line-through; 68 | color: grey; 69 | } 70 | .card .dcto { 71 | white-space: nowrap; 72 | font-size: small; 73 | color: green; 74 | } 75 | .card .oferta { 76 | font-size: 150%; 77 | margin: .5rem 0; 78 | } 79 | #cargando { 80 | margin-top: -10vh; 81 | height:20vh; 82 | width: 400%; 83 | background: transparent url(https://loading.io/spinners/balls/lg.circle-slack-loading-icon.gif) no-repeat center center; 84 | background-size: contain; 85 | } 86 | .pie { 87 | font-size: .9rem; 88 | margin-top: 5rem; 89 | color: #999; 90 | } 91 | .by { 92 | font-size: .9rem; 93 | color: #ccc; 94 | } --------------------------------------------------------------------------------