├── insta.csv ├── fb.csv ├── twitter.csv ├── you.csv ├── crawlyoutube.sh ├── crawlinsta.sh ├── README.md ├── crawltwitter.sh ├── crawlfb.sh └── LICENSE /insta.csv: -------------------------------------------------------------------------------- 1 | Aurel,https://www.instagram.com/aurelie.hermansyah/ 2 | -------------------------------------------------------------------------------- /fb.csv: -------------------------------------------------------------------------------- 1 | ITS,https://www.facebook.com/InstitutTeknologiSepuluhNopember/ 2 | -------------------------------------------------------------------------------- /twitter.csv: -------------------------------------------------------------------------------- 1 | Aurel,https://twitter.com/aurelhermansyah 2 | Ashanti,https://twitter.com/ashantysiddik 3 | -------------------------------------------------------------------------------- /you.csv: -------------------------------------------------------------------------------- 1 | okezone,https://www.youtube.com/user/okezoneindonesia/videos 2 | RCTI,https://www.youtube.com/user/RCTIOfficialChannel/videos 3 | -------------------------------------------------------------------------------- /crawlyoutube.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | IFS="," 3 | while 4 | read f1 f2 5 | do 6 | nama=`echo $f1 | tr ' ' '-' | tr '\t' '-'` 7 | echo "id $nama" 8 | url=${f2%?} 9 | if [ "$url" != "-" ] 10 | then 11 | status=$(curl -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8)" -w "%{http_code}" -o temp -L --silent "$url") 12 | if [[ "$status" =~ "200" ]] 13 | then 14 | now=$(date +"%d-%m-%y-%H-%M") 15 | grep "yt-lockup-title" temp | grep -Eoh 'title=".*" a'| sed -r 's/^.{7}//' | sed -r 's/.{4}$//' > feed 16 | grep "yt-lockup-meta-info" temp | grep -Eoh "
(.*?)
' temp1 | sed -n '/^$/!{s/<[^>]*>//g;p;}' > feed 16 | grep -Eoh 'data-utime="[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]' temp1 | sed -r 's/^.{12}//' > tgl 17 | > tgl1 18 | while 19 | read line 20 | do 21 | date -d @$line +"%d-%m-%y %T" >> tgl1 22 | done < tgl 23 | baristgl=`wc -l < tgl` 24 | barisfeed=`wc -l < feed` 25 | echo $baristgl $barisfeed 26 | val=`expr $baristgl - $barisfeed` 27 | now=$(date +"%d-%m-%y-%H-%M") 28 | if [ "$val" -gt 0 ]; then 29 | v="1," 30 | var="${val}d" 31 | val=$v$var 32 | sed "$val" tgl1 > tgl 33 | paste tgl feed | tr '[:upper:]' '[:lower:]' > res/$nama-$now-f.tsv 34 | else 35 | paste tgl1 feed | tr '[:upper:]' '[:lower:]'> res/$nama-$now-f.tsv 36 | fi 37 | fi 38 | fi 39 | done < $1 40 | rm tgl1 temp1 temp tgl feed 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc.