├── magpie ├── AUTHORS ├── scripts │ ├── smarty_plugin │ │ └── modifier.rss_date_parse.php │ ├── magpie_simple.php │ ├── templates │ │ └── simple.smarty │ ├── README │ ├── magpie_slashbox.php │ ├── simple_smarty.php │ └── magpie_debug.php ├── CHANGES ├── README ├── NEWS ├── rss_utils.inc ├── cookbook ├── INSTALL ├── TROUBLESHOOTING ├── rss_cache.inc ├── htdocs │ ├── cookbook.html │ └── index.html ├── ChangeLog ├── rss_fetch.inc ├── rss_parse.inc └── extlib │ └── Snoopy.class.inc ├── rss_db ├── config.conf ├── logs └── test ├── help.php ├── classReloader.php ├── userFunctions.php ├── botLogger.php ├── handle_functions.php ├── config.php ├── modules.php ├── db_users.php ├── db_rssFeeds.php ├── DBFunctions.php ├── RSSFunctions.php └── bot.php /magpie/AUTHORS: -------------------------------------------------------------------------------- 1 | kellan 2 | -------------------------------------------------------------------------------- /rss_db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/torne/CrappyRSSBot/master/rss_db -------------------------------------------------------------------------------- /config.conf: -------------------------------------------------------------------------------- 1 | #comment 2 | [connection] 3 | nick=crappyBot 4 | server=irc.netgamers.org 5 | port=6667 6 | user=crappy rss irc bot 7 | realname=crappy 8 | [channels] 9 | chan1=#wearelegion 10 | [database] 11 | dbname 12 | dbuser 13 | dbpass -------------------------------------------------------------------------------- /logs/test: -------------------------------------------------------------------------------- 1 | some text 2 | some text 3 | some text 4 | some text 5 | some text 6 | 18/10/2009-21:58 - some text 7 | 20/10/2009-20:16 - some text 8 | 20/10/2009-20:16 - some text 9 | 20/10/2009-20:52 - some text 10 | 23/10/2009-22:08 - some text 11 | 23/10/2009-22:11 - some text 12 | 23/10/2009-22:14 - some text 13 | 23/10/2009-22:18 - some text 14 | -------------------------------------------------------------------------------- /help.php: -------------------------------------------------------------------------------- 1 | _findClassByMethod( $bot, "_help_".$method ); 20 | if ( !$objectname ) 21 | return "No help for command $method."; 22 | 23 | 24 | //find the class that matches the method 25 | $object = new $objectname(); 26 | return call_user_func( array($object, "_help_".$method) ); 27 | } 28 | 29 | } 30 | 31 | ?> -------------------------------------------------------------------------------- /classReloader.php: -------------------------------------------------------------------------------- 1 | _loadMethodMap($bot); 38 | return "$filename reloaded."; 39 | } 40 | else 41 | { 42 | return "$filename failed to reload."; 43 | } 44 | } 45 | 46 | } 47 | 48 | ?> -------------------------------------------------------------------------------- /magpie/scripts/smarty_plugin/modifier.rss_date_parse.php: -------------------------------------------------------------------------------- 1 | 32 | -------------------------------------------------------------------------------- /magpie/scripts/magpie_simple.php: -------------------------------------------------------------------------------- 1 | channel['title'] . "

"; 11 | echo "

"; 18 | } 19 | ?> 20 | 21 |
22 | RSS URL:
23 | 24 |
25 | 26 |

27 |

Security Note:

28 | This is a simple example script. If this was a real script we probably wouldn't allow strangers to submit random URLs, and we certainly wouldn't simply echo anything passed in the URL. Additionally its a bad idea to leave this example script lying around. 29 |

-------------------------------------------------------------------------------- /userFunctions.php: -------------------------------------------------------------------------------- 1 | db = new db_users(); 13 | $this->db->_connectUsers(); 14 | } 15 | 16 | /** 17 | * 18 | * @param unknown_type $nick 19 | * @param unknown_type $password 20 | * @param unknown_type $email 21 | * 22 | */ 23 | public function register( $bot, $password, $email ) 24 | { 25 | 26 | if ( $this->db->_checkNickExists( $bot->_getNick() ) ) 27 | { 28 | echo $this->db->_getUserMessage(); 29 | } 30 | if ( $this->db->_checkEmailExists( $email ) ) 31 | { 32 | echo $this->db->_getUserMessage(); 33 | } 34 | } 35 | 36 | /** 37 | * 38 | * @param unknown_type $nick 39 | * @param unknown_type $password 40 | * 41 | */ 42 | public function login( $nick, $password ) 43 | { 44 | if ( !$this->db->_checkNickPassword( $nick, $password) ) 45 | { 46 | echo $this->db->_getUserMessage(); 47 | } 48 | } 49 | 50 | } 51 | 52 | ?> -------------------------------------------------------------------------------- /magpie/scripts/templates/simple.smarty: -------------------------------------------------------------------------------- 1 | 2 | 3 | A Simple RSS Box: I'm not a designer 4 | 5 | 6 | 7 |
8 | RSS File: 9 | 10 | 11 |
12 | 13 | Displaying: {$rss_url} 14 |

15 | 16 | {* if $error display the error 17 | elseif parsed RSS object display the RSS 18 | else solicit user for a URL 19 | *} 20 | 21 | {if $error } 22 | Error: {$error} 23 | {elseif $rss} 24 | 25 | 26 | 29 | 30 | {foreach from=$rss->items item=item} 31 | 32 | 35 | 38 | 39 | {/foreach} 40 |
27 | {$rss->channel.title} 28 |
33 | {$item.title} 34 | 36 | {$item.dc.date|rss_date_parse|date_format:"%A, %B %e, %Y"} 37 |
41 | {else} 42 | Enter the URL of an RSS file to display. 43 | {/if} 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /magpie/scripts/README: -------------------------------------------------------------------------------- 1 | Some example on how to use Magpie: 2 | 3 | * magpie_simple.php * 4 | Simple example of fetching and parsing an RSS file. Expects to be 5 | called with a query param 'rss_url=http://' 6 | 7 | * simple_smarty.php * 8 | Similiar to magpie_simple, but using the Smarty template engine to do 9 | display. Also demostrates using rss_utils.inc and a smarty plugin to 10 | parse and display when each RSS item was published. 11 | 12 | * magpie_debug.php * 13 | Displays all the information available from a parsed feed. 14 | 15 | * smarty_plugin/modifier.rss_date_parse.php * 16 | 17 | A Smarty plugin for parsing RSS style dates. You must include rss_utils.inc 18 | for this plugin to work. It also must be installed in the Smarty plugin 19 | directory, see the Smarty docs for details. 20 | 21 | * templates/simple.smarty 22 | A Smarty template used by simple_smarty.php which demostrates 23 | displaying an RSS feed and using the date parse plugin. 24 | 25 | 26 | The Smarty template engine and documentation on how to use it are available from 27 | http://smarty.php.net 28 | -------------------------------------------------------------------------------- /magpie/CHANGES: -------------------------------------------------------------------------------- 1 | Version 0.72 2 | ----------- 3 | - fix security exploit: http://www.sec-consult.com/216.html 4 | 5 | Version 0.7 6 | ----------- 7 | - support for input and output charset encoding 8 | based on the work in FoF, uses iconv or mbstring if available 9 | - 10 | 11 | Version 0.6 12 | ----------- 13 | - basic support for Atom syndication format 14 | including support for Atom content constructs 15 | - fixed support for private feeds (HTTP Auth and SSL) 16 | (thanks to silverorange.com for providing test feeds) 17 | - support for some broken webservers 18 | 19 | Version 0.52 20 | ----------- 21 | - support GZIP content negoiation 22 | - PHP 4.3.2 support 23 | 24 | Version 0.4 25 | ----------- 26 | - improved error handling, better access for script authors 27 | - included example scripts of working with MagpieRSS 28 | - new Smarty plugin for RSS date parsing 29 | 30 | Version 0.3 31 | ----------- 32 | - added support for conditional gets (Last-Modified, ETag) 33 | - now use Snoopy to handle fetching RSS files 34 | 35 | Version 0.2 36 | ----------- 37 | - MAJOR CLEAN UP 38 | - removed kludgy $options array in favour of constants 39 | - phased out returning arrays 40 | - added better error handling 41 | - re-worked comments 42 | -------------------------------------------------------------------------------- /botLogger.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /magpie/README: -------------------------------------------------------------------------------- 1 | NAME 2 | 3 | MagpieRSS - a simple RSS integration tool 4 | 5 | SYNOPSIS 6 | 7 | require_once(rss_fetch.inc); 8 | $url = $_GET['url']; 9 | $rss = fetch_rss( $url ); 10 | 11 | echo "Channel Title: " . $rss->channel['title'] . "

"; 12 | echo "

"; 19 | 20 | DESCRIPTION 21 | 22 | MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", 23 | and simple to use. 24 | 25 | Some features include: 26 | 27 | * supports RSS 0.9 - 1.0, with limited RSS 2.0 support 28 | * supports namespaces, and modules, including mod_content and mod_event 29 | * open minded [1] 30 | * simple, functional interface, to object oriented backend parser 31 | * automatic caching of parsed RSS objects makes its easy to integrate 32 | * supports conditional GET with Last-Modified, and ETag 33 | * uses constants for easy override of default behaviour 34 | * heavily commented 35 | 36 | 37 | 1. By open minded I mean Magpie will accept any tag it finds in good faith that 38 | it was supposed to be here. For strict validation, look elsewhere. 39 | 40 | 41 | GETTING STARTED 42 | 43 | 44 | 45 | COPYRIGHT: 46 | Copyright(c) 2002 kellan@protest.net. All rights reserved. 47 | This software is released under the GNU General Public License. 48 | Please read the disclaimer at the top of the Snoopy.class.inc file. 49 | -------------------------------------------------------------------------------- /magpie/scripts/magpie_slashbox.php: -------------------------------------------------------------------------------- 1 | 9 | 10 | 12 | 13 |
14 | 15 |
16 | 17 | "; 21 | $rss = fetch_rss( $url ); 22 | echo slashbox ($rss); 23 | } 24 | 25 | echo "
";
26 | print_r($rss);
27 | echo "
"; 28 | ?> 29 | 30 | 31 | 32 | 33 | "; 40 | echo ""; 41 | 42 | # get the channel title and link properties off of the rss object 43 | # 44 | $title = $rss->channel['title']; 45 | $link = $rss->channel['link']; 46 | 47 | echo "$title"; 48 | echo ""; 49 | 50 | # foreach over each item in the array. 51 | # displaying simple links 52 | # 53 | # we could be doing all sorts of neat things with the dublin core 54 | # info, or the event info, or what not, but keeping it simple for now. 55 | # 56 | foreach ($rss->items as $item ) { 57 | echo ""; 58 | echo ""; 59 | echo $item['title']; 60 | echo ""; 61 | } 62 | 63 | echo ""; 64 | } 65 | 66 | ?> 67 | -------------------------------------------------------------------------------- /magpie/scripts/simple_smarty.php: -------------------------------------------------------------------------------- 1 | compile_check = true; 32 | 33 | // url of an rss file 34 | $url = $_GET['rss_url']; 35 | 36 | 37 | if ( $url ) { 38 | // assign a variable to smarty for use in the template 39 | $smarty->assign('rss_url', $url); 40 | 41 | // use MagpieRSS to fetch remote RSS file, and parse it 42 | $rss = fetch_rss( $url ); 43 | 44 | // if fetch_rss returned false, we encountered an error 45 | if ( !$rss ) { 46 | $smarty->assign( 'error', magpie_error() ); 47 | } 48 | $smarty->assign('rss', $rss ); 49 | 50 | $item = $rss->items[0]; 51 | $date = parse_w3cdtf( $item['dc']['date'] ); 52 | $smarty->assign( 'date', $date ); 53 | } 54 | 55 | // parse smarty template, and display using the variables we assigned 56 | $smarty->display('simple.smarty'); 57 | 58 | ?> 59 | -------------------------------------------------------------------------------- /magpie/NEWS: -------------------------------------------------------------------------------- 1 | MagpieRSS News 2 | 3 | MAGPIERSS 0.51 RELEASED 4 | * important bugfix! 5 | * fix "silent failure" when PHP doesn't have zlib 6 | 7 | FEED ON FEEDS USES MAGPIE 8 | * web-based RSS aggregator built with Magpie 9 | * easy to install, easy to use. 10 | http://minutillo.com/steve/feedonfeeds/ 11 | 12 | MAGPIERSS 0.5 RELEASED 13 | * supports transparent HTTP gzip content negotiation for reduced bandwidth usage 14 | * quashed some undefined index notices 15 | 16 | MAGPIERSS 0.46 RELEASED 17 | * minor release, more error handling clean up 18 | * documentation fixes, simpler example 19 | * new trouble shooting guide for installation and usage problems 20 | http://magpierss.sourceforge.net/TROUBLESHOOTING 21 | 22 | MAGPIE NEWS AS RSS 23 | * releases, bug fixes, releated stories in RSS 24 | 25 | MAGPIERSS COOKBOOK: SIMPLE PHP RSS HOW TOS 26 | * answers some of the most frequently asked Magpie questions 27 | * feedback, suggestions, requests, recipes welcome 28 | http://magpierss.sourceforge.net/cookbook.html 29 | 30 | MAGPIERSS 0.4 RELEASED! 31 | * improved error handling, more flexibility for script authors, backwards compatible 32 | * new and better examples! including using MagpieRSS and Smarty 33 | * new Smarty plugin for RSS date parsing 34 | http://smarty.php.net 35 | 36 | INFINITE PENGUIN NOW SUPPORTS MAGPIE 0.3 37 | * simple, sophisticated RSS viewer 38 | * includes auto-generated javascript ticker from RSS feed 39 | http://www.infinitepenguins.net/rss/ 40 | 41 | TRAUMWIND RELEASES REX BACKEND FOR MAGPIERSS 42 | * drop in support using regex based XML parser 43 | * parses improperly formed XML that chokes expat 44 | http://traumwind.de/blog/magpie/magpie_alike.php 45 | 46 | MAGPIERSS 0.3 RELEASED! 47 | * Support added for HTTP Conditional GETs. 48 | http://fishbowl.pastiche.org/archives/001132.html 49 | 50 | MAGPIERSS 0.2! 51 | * Major clean up of the code. Easier to use. 52 | * Simpler install on shared hosts. 53 | * Better documentation and comments. 54 | -------------------------------------------------------------------------------- /magpie/scripts/magpie_debug.php: -------------------------------------------------------------------------------- 1 | Example Output"; 29 | echo "Channel: " . $rss->channel['title'] . "

"; 30 | echo "

"; 37 | } 38 | else { 39 | echo "Error: " . magpie_error(); 40 | } 41 | ?> 42 | 43 |
44 | RSS URL:
45 | 46 |
47 | 48 |

Parsed Results (var_dump'ed)

49 |
50 | 
51 | 
52 | 53 | Error: PHP compiled without XML support (--with-xml), Mapgie won't work without PHP support for XML.
\n"; 58 | exit; 59 | } 60 | else { 61 | echo "OK: Found an XML parser.
\n"; 62 | } 63 | 64 | if ( ! function_exists('gzinflate') ) { 65 | echo "Warning: PHP compiled without Zlib support (--with-zlib). No support for GZIP encoding.
\n"; 66 | } 67 | else { 68 | echo "OK: Support for GZIP encoding.
\n"; 69 | } 70 | 71 | if ( ! (function_exists('iconv') and function_exists('mb_convert_encoding') ) ) { 72 | echo "Warning: No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." . 73 | "No support character set munging.
\n"; 74 | } 75 | else { 76 | echo "OK: Support for character munging.
\n"; 77 | } 78 | } 79 | 80 | ?> 81 | -------------------------------------------------------------------------------- /magpie/rss_utils.inc: -------------------------------------------------------------------------------- 1 | 6 | * Version: 0.51 7 | * License: GPL 8 | * 9 | * The lastest version of MagpieRSS can be obtained from: 10 | * http://magpierss.sourceforge.net 11 | * 12 | * For questions, help, comments, discussion, etc., please join the 13 | * Magpie mailing list: 14 | * magpierss-general@lists.sourceforge.net 15 | */ 16 | 17 | 18 | /*======================================================================*\ 19 | Function: parse_w3cdtf 20 | Purpose: parse a W3CDTF date into unix epoch 21 | 22 | NOTE: http://www.w3.org/TR/NOTE-datetime 23 | \*======================================================================*/ 24 | 25 | function parse_w3cdtf ( $date_str ) { 26 | 27 | # regex to match wc3dtf 28 | $pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; 29 | 30 | if ( preg_match( $pat, $date_str, $match ) ) { 31 | list( $year, $month, $day, $hours, $minutes, $seconds) = 32 | array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); 33 | 34 | # calc epoch for current date assuming GMT 35 | $epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); 36 | 37 | $offset = 0; 38 | if ( $match[10] == 'Z' ) { 39 | # zulu time, aka GMT 40 | } 41 | else { 42 | list( $tz_mod, $tz_hour, $tz_min ) = 43 | array( $match[8], $match[9], $match[10]); 44 | 45 | # zero out the variables 46 | if ( ! $tz_hour ) { $tz_hour = 0; } 47 | if ( ! $tz_min ) { $tz_min = 0; } 48 | 49 | $offset_secs = (($tz_hour*60)+$tz_min)*60; 50 | 51 | # is timezone ahead of GMT? then subtract offset 52 | # 53 | if ( $tz_mod == '+' ) { 54 | $offset_secs = $offset_secs * -1; 55 | } 56 | 57 | $offset = $offset_secs; 58 | } 59 | $epoch = $epoch + $offset; 60 | return $epoch; 61 | } 62 | else { 63 | return -1; 64 | } 65 | } 66 | 67 | ?> 68 | -------------------------------------------------------------------------------- /handle_functions.php: -------------------------------------------------------------------------------- 1 | _getData()); 21 | if( ! preg_match("/(.+)(?:!~|!)(.+)@(.+) PRIVMSG (.+) :(!|\.|\+|-)(.+)/", $bot->_getData(), $matches) ) 22 | { 23 | return; 24 | } 25 | 26 | $nick = $matches[1]; 27 | $user = $matches[2]; 28 | $hostmask = $matches[3]; 29 | $returnDest = $matches[4]; 30 | if( strcasecmp($returnDest, $bot->_getConfig('nick')) == 0 ) 31 | $returnDest = $nick; 32 | $messageType = $matches[5]; 33 | $message = $matches[6]; 34 | 35 | if( strcasecmp($hostmask, "403.be") && strcasecmp($hostmask, "Gabriel.users.netgamers.org") ) 36 | return; 37 | 38 | $bot->_setPrivmsg($nick, $user, $hostmask, $returnDest, $messageType, $message); 39 | 40 | if( preg_match("/reload (.+)/", $message, $matches) ) 41 | { 42 | $filename = $matches[1]; 43 | return "reload $returnDest $filename"; 44 | } 45 | 46 | $objectname = ''; 47 | $method = ''; 48 | $object = ''; 49 | 50 | $args = array(); 51 | 52 | $messageArray = explode(' ', $message); 53 | $method = $messageArray[0]; 54 | $args = array_slice($messageArray, 1); 55 | array_unshift($args, $bot); 56 | 57 | $modules = new modules(); 58 | $objectname = $modules->_findClassByMethod($bot, $method); 59 | if( ! $objectname || $message[0] == "_" ) 60 | { 61 | $bot->_sendMsg($returnDest, 'No such command.'); 62 | return; 63 | } 64 | 65 | $object = new $objectname(); 66 | $returnMsg = call_user_func_array(array($object , $method), $args); 67 | if( $returnMsg ) 68 | $bot->_sendMsg($returnDest, $returnMsg); 69 | $bot->_setPrivmsg(null, null, null, null, null, null); 70 | } 71 | 72 | /** 73 | * 74 | * @param $bot 75 | */ 76 | public function _handle_PING ($bot) 77 | { 78 | $explodedData = explode(" ", $bot->_getData()); 79 | $bot->_putToServer("PONG " . $explodedData[1] . "\r\n"); 80 | } 81 | 82 | /** 83 | * 84 | * @param unknown_type $bot 85 | */ 86 | public function _handle_254 ($bot) 87 | { 88 | $bot->_joinChans(); 89 | } 90 | 91 | /** 92 | * 93 | * @param unknown_type $bot 94 | */ 95 | public function _handle_433 ($bot) 96 | { 97 | 98 | } 99 | } 100 | 101 | ?> -------------------------------------------------------------------------------- /config.php: -------------------------------------------------------------------------------- 1 | curConfigFile = $this->defConfigFile; 32 | else 33 | $this->curConfigFile = $confFile; 34 | 35 | if ( !file_exists($this->curConfigFile) ) 36 | return 'File does not exist'; 37 | 38 | if ( !is_readable($this->curConfigFile) ) 39 | return 'File is not readable'; 40 | 41 | //get an array of lines of the config file 42 | $file = file($this->curConfigFile, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); 43 | 44 | //getting section outside of the loop to retain 45 | $section = ''; 46 | foreach ( $file as $line ) 47 | { 48 | //trim whitespace 49 | $line = trim($line); 50 | 51 | //if it's a comment 52 | if ( strcasecmp($line[0], $this->comment) == 0 ) 53 | { 54 | continue; 55 | } 56 | 57 | //it's a section header 58 | if ( preg_match("/\[(.*)\]/", $line, $matches) ) 59 | { 60 | $section = $matches[1]; 61 | continue; 62 | } 63 | 64 | //it's a name=value pair 65 | $namevalue = explode("=", $line); 66 | $name = trim($namevalue[0]); 67 | $value = trim($namevalue[1]); 68 | if ( strcasecmp($section, 'channels') == 0 ) 69 | { 70 | $this->_setConfig($value, $value, $section); 71 | } 72 | $this->_setConfig($name, $value); 73 | } 74 | } 75 | 76 | /** 77 | * 78 | * Set configuration for $name to $value 79 | * 80 | */ 81 | public function _setConfig ( $name, $value, $section=null ) 82 | { 83 | if ( $section ) 84 | $this->configuration[$section][$name] = $value; 85 | else 86 | $this->configuration[$name] = $value; 87 | } 88 | 89 | /** 90 | * 91 | * Get a configuration item 92 | * @return string 93 | * 94 | */ 95 | public function _getConfig ( $name, $section=null ) 96 | { 97 | if( $section ) 98 | return $this->configuration[$section][$name]; 99 | else 100 | return $this->configuration[$name]; 101 | } 102 | 103 | /** 104 | * 105 | */ 106 | public function _getChans() 107 | { 108 | return $this->configuration['channels']; 109 | } 110 | } 111 | ?> -------------------------------------------------------------------------------- /modules.php: -------------------------------------------------------------------------------- 1 | modules = array(); 20 | } 21 | 22 | /** 23 | * 24 | * @param unknown_type $method 25 | */ 26 | public function _findClassByMethod ($bot, $method) 27 | { 28 | var_dump($method); 29 | //var_dump($this->methodMap); 30 | $methodMap = $bot->_getMethodmap(); 31 | return $methodMap[$method]; 32 | } 33 | 34 | /** 35 | * 36 | */ 37 | public function _loadRequirements ($bot) 38 | { 39 | $modules = array(); 40 | $curDir = getcwd(); 41 | $dirList = scandir($curDir); 42 | $i = 0; 43 | foreach ($dirList as $file) 44 | { 45 | echo "$file\r\n"; 46 | if (preg_match("/(.*)\.php/", $file, $matches) && $file != "modules.php" && $file != "bot.php") 47 | { 48 | $modules[] = $matches[1]; 49 | require ($file); 50 | $i ++; 51 | } 52 | } 53 | $modules[] = get_class($this); 54 | $bot->_setModules($modules); 55 | $this->_loadMethodMap($bot); 56 | return $i; 57 | } 58 | 59 | /** 60 | * 61 | * @param unknown_type $modulename 62 | */ 63 | public function _loadModule ($bot, $modulename) 64 | { 65 | $success = include ($modulename . ".php"); 66 | if ($success) 67 | { 68 | $modules = $bot->_getModules; 69 | $modules[] = $modulename; 70 | $bot->_setModules($modules); 71 | } 72 | return $success; 73 | } 74 | 75 | /** 76 | * 77 | * @param unknown_type $bot 78 | * 79 | */ 80 | public function _loadMethodMap ($bot) 81 | { 82 | $methodMap = array(); 83 | foreach ($bot->_getModules() as $module) 84 | { 85 | if (! class_exists($module)) 86 | continue; 87 | $classmethods = get_class_methods($module); 88 | foreach ($classmethods as $method) 89 | { 90 | if ($method[0] == "_" && $method[1] == "_") 91 | continue; 92 | if (array_key_exists($method, $methodMap)) 93 | { 94 | die("Multiple modules with the same method $module, " . $methodMap[$method] . ", $method\r\n"); 95 | } 96 | else 97 | { 98 | $methodMap[$method] = $module; 99 | } 100 | } 101 | } 102 | $bot->_setMethodmap($methodMap); 103 | return true; 104 | } 105 | 106 | /** 107 | * 108 | * @param unknown_type $bot 109 | * @return string 110 | * 111 | */ 112 | public function commands ($bot) 113 | { 114 | $publiccommands = array(); 115 | foreach ($bot->_getMethodmap() as $command => $module) 116 | { 117 | if ($command[0] != "_") 118 | $publiccommands[] = $command; 119 | } 120 | return "Commands available to you are " . implode(", ", $publiccommands); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /db_users.php: -------------------------------------------------------------------------------- 1 | db = new SQLite3($filename); 24 | 25 | if ( !$this->db ) 26 | { 27 | die($err); 28 | } 29 | 30 | return $this->db; 31 | } 32 | 33 | /** 34 | * 35 | */ 36 | function _getUserMessage() 37 | { 38 | return $this->message; 39 | } 40 | // 41 | // /** 42 | // * 43 | // */ 44 | // function _describeTable() 45 | // { 46 | // $result = $this->db->query("SELECT * FROM sqlite_master WHERE name = '$this->tablename'"); 47 | // //var_dump ( $result->fetchArray() ); 48 | // $result = $this->db->query("PRAGMA table_info($this->tablename)"); 49 | // while ( $row = $result->fetchArray() ) 50 | // print_r($row); 51 | // //var_dump( $this->db->arrayQuery("table_info($tablename)") ); 52 | // } 53 | 54 | // /** 55 | // * 56 | // */ 57 | // function _createTable() 58 | // { 59 | // $string = "drop table if exists $this->tablename"; 60 | // $this->db->exec($string); 61 | // $string = "create table $this->tablename(user_id INTEGER PRIMARY KEY ASC, nick varchar(256) unique, password varchar(256), email varchar(256), lasthost varchar(256))"; 62 | // $this->db->exec($string); 63 | // } 64 | 65 | /** 66 | * 67 | * @param unknown_type $nick 68 | */ 69 | function _checkNickExists( $nick ) 70 | { 71 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE nick=$nick"); 72 | if ( $result ) 73 | { 74 | $this->message = "Nick exists."; 75 | return true; 76 | } 77 | return false; 78 | } 79 | 80 | function _checkEmailExists( $email ) 81 | { 82 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE email=$email"); 83 | if ( $result ) 84 | { 85 | $this->message = "Email exists."; 86 | return true; 87 | } 88 | return false; 89 | } 90 | 91 | /** 92 | * 93 | * @param unknown_type $nick 94 | * @param unknown_type $password 95 | */ 96 | function _checkNickPassword( $nick, $password ) 97 | { 98 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE nick=$nick AND password=$password"); 99 | if ( !$result ) 100 | { 101 | $this->message = "Nick or password incorrect."; 102 | return false; 103 | } 104 | return true; 105 | } 106 | 107 | function _registerUser( $nick, $password, $email ) 108 | { 109 | if ( $this->_checkNickExists() ) 110 | { 111 | return false; 112 | } 113 | $result = $this->db->exec("INSERT INTO $this->tablename (url, title, lastTitle) VALUES ('$url', '$title', '$lastTitle')"); 114 | if ( !$result ) 115 | { 116 | $this->message = "Could not insert into table"; 117 | return false; 118 | } 119 | 120 | } 121 | } 122 | ?> -------------------------------------------------------------------------------- /magpie/cookbook: -------------------------------------------------------------------------------- 1 | MAGPIERSS RECIPES: Cooking with Corbies 2 | 3 | "Four and twenty blackbirds baked in a pie." 4 | 5 | 1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED. 6 | 7 | PROBLEM: 8 | 9 | You want to display the 10 (or 3) most recent headlines, but the RSS feed 10 | contains 15. 11 | 12 | SOLUTION: 13 | 14 | $num_items = 10; 15 | $rss = fetch_rss($url); 16 | 17 | $items = array_slice($rss->items, 0, $num_items); 18 | 19 | DISCUSSION: 20 | 21 | Rather then trying to limit the number of items Magpie parses, a much simpler, 22 | and more flexible approach is to take a "slice" of the array of items. And 23 | array_slice() is smart enough to do the right thing if the feed has less items 24 | then $num_items. 25 | 26 | See: http://www.php.net/array_slice 27 | 28 | 29 | 2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG 30 | 31 | PROBLEM: 32 | 33 | You don't want Magpie's error messages showing up if something goes wrong. 34 | 35 | SOLUTION: 36 | 37 | # Magpie throws USER_WARNINGS only 38 | # so you can cloak these, by only showing ERRORs 39 | error_reporting(E_ERROR); 40 | 41 | # check the return value of fetch_rss() 42 | 43 | $rss = fetch_rss($url); 44 | 45 | if ( $rss ) { 46 | ...display rss feed... 47 | } 48 | else { 49 | echo "An error occured! " . 50 | "Consider donating more $$$ for restoration of services." . 51 | "
Error Message: " . magpie_error(); 52 | } 53 | 54 | DISCUSSION: 55 | 56 | MagpieRSS triggers a warning in a number of circumstances. The 2 most common 57 | circumstances are: if the specified RSS file isn't properly formed (usually 58 | because it includes illegal HTML), or if Magpie can't download the remote RSS 59 | file, and there is no cached version. 60 | 61 | If you don't want your users to see these warnings change your error_reporting 62 | settings to only display ERRORs. Another option is to turn off display_error, 63 | so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. 64 | 65 | You can do this with: 66 | 67 | ini_set('display_errors', 0); 68 | 69 | See: http://www.php.net/error_reporting, 70 | http://www.php.net/ini_set, 71 | http://www.php.net/manual/en/ref.errorfunc.php 72 | 73 | 3. GENERATE A NEW RSS FEED 74 | 75 | PROBLEM: 76 | 77 | Create an RSS feed for other people to use. 78 | 79 | SOLUTION: 80 | 81 | Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/) 82 | 83 | DISCUSSION: 84 | 85 | An example of turning a Magpie parsed RSS object back into an RSS file is forth 86 | coming. In the meantime RSSWriter has great documentation. 87 | 88 | 4. DISPLAY HEADLINES MORE RECENT THEN X DATE 89 | 90 | PROBLEM: 91 | 92 | You only want to display headlines that were published on, or after a certain 93 | date. 94 | 95 | 96 | SOLUTION: 97 | 98 | require 'rss_utils.inc'; 99 | 100 | # get all headlines published today 101 | $today = getdate(); 102 | 103 | # today, 12AM 104 | $date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); 105 | 106 | $rss = fetch_rss($url); 107 | 108 | foreach ( $rss->items as $item ) { 109 | $published = parse_w3cdtf($item['dc']['date']); 110 | if ( $published >= $date ) { 111 | echo "Title: " . $item['title']; 112 | echo "Published: " . date("h:i:s A", $published); 113 | echo "

"; 114 | } 115 | } 116 | 117 | DISCUSSION: 118 | 119 | This recipe only works for RSS 1.0 feeds that include the field. 120 | (which is very good RSS style) 121 | 122 | parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix 123 | epoch seconds. 124 | 125 | See: http://www.php.net/manual/en/ref.datetime.php 126 | -------------------------------------------------------------------------------- /db_rssFeeds.php: -------------------------------------------------------------------------------- 1 | db->close(); 20 | } 21 | 22 | /** 23 | * 24 | * @param unknown_type $filename 25 | */ 26 | function _connectRSS($filename='rss_db') 27 | { 28 | $this->filename = $filename; 29 | $err = ''; 30 | $this->db = new SQLite3($filename); 31 | 32 | if ( !$this->db ) 33 | { 34 | die($err); 35 | } 36 | 37 | return $this->db; 38 | } 39 | 40 | /** 41 | * 42 | */ 43 | function _getRSSMessage() 44 | { 45 | return $this->message; 46 | } 47 | 48 | // /** 49 | // * 50 | // */ 51 | // function _describeTable() 52 | // { 53 | // $result = $this->db->query("SELECT * FROM sqlite_master WHERE name = '$this->tablename'"); 54 | // //var_dump ( $result->fetchArray() ); 55 | // $result = $this->db->query("PRAGMA table_info($this->tablename)"); 56 | // while ( $row = $result->fetchArray() ) 57 | // print_r($row); 58 | // //var_dump( $this->db->arrayQuery("table_info($tablename)") ); 59 | // } 60 | 61 | // /** 62 | // * 63 | // */ 64 | // function _createTable() 65 | // { 66 | // $string = "create table $this->tablename(feedid INTEGER PRIMARY KEY ASC, url varchar(256), title varchar(256), lastTitle varchar(256))"; 67 | // $this->db->exec($string); 68 | // } 69 | 70 | /** 71 | * 72 | */ 73 | function _getFeeds() 74 | { 75 | $result = $this->db->query("SELECT * FROM $this->tablename"); 76 | $feeds = array(); 77 | while ( $feeds[] = $result->fetchArray() ); 78 | 79 | return $feeds; 80 | } 81 | 82 | /** 83 | * 84 | * @param unknown_type $url 85 | */ 86 | function _getIdForUrl( $url ) 87 | { 88 | if ( empty($url) ) 89 | { 90 | echo "no url\r\n"; 91 | return; 92 | } 93 | $url = $this->db->escapeString($url); 94 | $result = $this->db->query("SELECT * FROM $this->tablename WHERE url='$url'"); 95 | if ( !$result ) 96 | { 97 | $this->message = 'No such feed is stored.'; 98 | return false; 99 | } 100 | $array = $result->fetchArray(); 101 | return $array['feedid']; 102 | } 103 | 104 | /** 105 | * 106 | * @param $feedid 107 | */ 108 | function _getFeedDetailsForFeedid( $feedid ) 109 | { 110 | if ( empty($feedid) ) 111 | { 112 | echo "no feedid\r\n"; 113 | return; 114 | } 115 | $feedid = $this->db->escapeString($feedid); 116 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE feedid=$feedid"); 117 | if ( !$result ) 118 | { 119 | $this->message = "No such feed id."; 120 | return false; 121 | } 122 | return $result->fetchArray(); 123 | } 124 | 125 | /** 126 | * 127 | * @param $url 128 | */ 129 | function _getFeedDetailsForURL( $url ) 130 | { 131 | if ( empty($url) ) 132 | { 133 | echo "no url\r\n"; 134 | return; 135 | } 136 | $feedid = $this->_getIdForUrl($url); 137 | 138 | if ( !$feedid ) 139 | return false; 140 | 141 | $result = $this->db->query("SELECT * FROM $this->tablename WHERE feedid=$feedid"); 142 | if ( !$result ) 143 | { 144 | $this->message = "No such feed id."; 145 | return false; 146 | } 147 | return $result->fetchArray(); 148 | } 149 | 150 | 151 | 152 | 153 | 154 | 155 | /** 156 | * 157 | * @param $feedid 158 | * @param $lastTitle 159 | */ 160 | function _updateLastForFeed( $feedid, $lastTitle ) 161 | { 162 | if ( empty($feedid) || empty($lastTitle) ) 163 | { 164 | echo 'No feedid or lastTitle\r\n'; 165 | return; 166 | } 167 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE feedid=$feedid"); 168 | if ( !$result ) 169 | { 170 | $this->message = "No such feed id."; 171 | return false; 172 | } 173 | 174 | $lastTitle = $this->db->escapeString($lastTitle); 175 | $query = "UPDATE $this->tablename SET lastTitle = '$lastTitle' WHERE feedid = $feedid"; 176 | $success = $this->db->exec($query); 177 | return $success; 178 | } 179 | 180 | /** 181 | * 182 | * @param unknown_type $url 183 | * @param unknown_type $title 184 | * @param unknown_type $lastTitle 185 | */ 186 | function _addFeed( $url, $title, $lastTitle ) 187 | { 188 | if ( empty($url) || empty($lastTitle) || empty($title) ) 189 | { 190 | echo 'No url or lastTitle or title\r\n'; 191 | return; 192 | } 193 | $url = $this->db->escapeString($url); 194 | $title = $this->db->escapeString($title); 195 | $lastTitle = $this->db->escapeString($lastTitle); 196 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE url='$url'"); 197 | if ( $result ) 198 | { 199 | $this->message = "That URL is already stored."; 200 | return false; 201 | } 202 | 203 | if ( !$result ) 204 | { 205 | $result = $this->db->exec("INSERT INTO $this->tablename (url, title, lastTitle) VALUES ('$url', '$title', '$lastTitle')"); 206 | if ( !$result ) 207 | { 208 | $this->message = "Could not insert into table"; 209 | return false; 210 | } 211 | return $this->db->lastInsertRowID(); 212 | } 213 | 214 | } 215 | } 216 | ?> -------------------------------------------------------------------------------- /magpie/INSTALL: -------------------------------------------------------------------------------- 1 | REQUIREMENTS 2 | 3 | MapieRSS requires a recent PHP 4+ (developed with 4.2.0) 4 | with xml (expat) support. 5 | 6 | Optionally: 7 | * PHP5 with libxml2 support. 8 | * cURL for SSL support 9 | * iconv (preferred) or mb_string for expanded character set support 10 | 11 | QUICK START 12 | 13 | Magpie consists of 4 files (rss_fetch.inc, rss_parser.inc, rss_cache.inc, 14 | and rss_utils.inc), and the directory extlib (which contains a modified 15 | version of the Snoopy HTTP client) 16 | 17 | Copy these 5 resources to a directory named 'magpierss' in the same 18 | directory as your PHP script. 19 | 20 | At the top of your script add the following line: 21 | 22 | require_once('magpierss/rss_fetch.inc'); 23 | 24 | Now you can use the fetch_rss() method: 25 | 26 | $rss = fetch_rss($url); 27 | 28 | Done. That's it. See README for more details on using MagpieRSS. 29 | 30 | NEXT STEPS 31 | 32 | Important: you'll probably want to get the cache directory working in 33 | order to speed up your application, and not abuse the webserver you're 34 | downloading the RSS from. 35 | 36 | Optionally you can install MagpieRSS in your PHP include path in order to 37 | make it available server wide. 38 | 39 | Lastly you might want to look through the constants in rss_fetch.inc see if 40 | there is anything you want to override (the defaults are pretty good) 41 | 42 | For more info, or if you have trouble, see TROUBLESHOOTING 43 | 44 | SETTING UP CACHING 45 | 46 | Magpie has built-in transparent caching. With caching Magpie will only 47 | fetch and parse RSS feeds when there is new content. Without this feature 48 | your pages will be slow, and the sites serving the RSS feed will be annoyed 49 | with you. 50 | 51 | ** Simple and Automatic ** 52 | 53 | By default Magpie will try to create a cache directory named 'cache' in the 54 | same directory as your PHP script. 55 | 56 | ** Creating a Local Cache Directory ** 57 | 58 | Often this will fail, because your webserver doesn't have sufficient 59 | permissions to create the directory. 60 | 61 | Exact instructions for how to do this will vary from install to install and 62 | platform to platform. The steps are: 63 | 64 | 1. Make a directory named 'cache' 65 | 2. Give the web server write access to that directory. 66 | 67 | An example of how to do this on Debian would be: 68 | 69 | 1. mkdir /path/to/script/cache 70 | 2. chgrp www-data /path/to/script/cache 71 | 3. chmod 775 /path/to/script/cache 72 | 73 | On other Unixes you'll need to change 'www-data' to what ever user Apache 74 | runs as. (on MacOS X the user would be 'www') 75 | 76 | ** Cache in /tmp ** 77 | 78 | Sometimes you won't be able to create a local cache directory. Some reasons 79 | might be: 80 | 81 | 1. No shell account 82 | 2. Insufficient permissions to change ownership of a directory 83 | 3. Webserver runs as 'nobody' 84 | 85 | In these situations using a cache directory in /tmp can often be a good 86 | option. 87 | 88 | The drawback is /tmp is public, so anyone on the box can read the cache 89 | files. Usually RSS feeds are public information, so you'll have to decide 90 | how much of an issue that is. 91 | 92 | To use /tmp as your cache directory you need to add the following line to 93 | your script: 94 | 95 | define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache'); 96 | 97 | ** Global Cache ** 98 | 99 | If you have several applications using Magpie, you can create a single 100 | shared cache directory, either using the /tmp cache, or somewhere else on 101 | the system. 102 | 103 | The upside is that you'll distribute fetching and parsing feeds across 104 | several applications. 105 | 106 | INSTALLING MAGPIE SERVER WIDE 107 | 108 | Rather then following the Quickstart instructions which requires you to have 109 | a copy of Magpie per application, alternately you can place it in some 110 | shared location. 111 | 112 | ** Adding Magpie to Your Include Path ** 113 | 114 | Copy the 5 resources (rss_fetch.inc, rss_parser.inc, rss_cache.inc, 115 | rss_utils.inc, and extlib) to a directory named 'magpierss' in your include 116 | path. Now any PHP file on your system can use Magpie with: 117 | 118 | require_once('magpierss/rss_fetch.inc'); 119 | 120 | Different installs have different include paths, and you'll have to figure 121 | out what your include_path is. 122 | 123 | From shell you can try: 124 | 125 | php -i | grep 'include_path' 126 | 127 | Alternatley you can create a phpinfo.php file with contains: 128 | 129 | 130 | 131 | Debian's default is: 132 | 133 | /usr/share/php 134 | 135 | (though more idealogically pure location would be /usr/local/share/php) 136 | 137 | Apple's default include path is: 138 | 139 | /usr/lib/php 140 | 141 | While the Entropy PHP build seems to use: 142 | 143 | /usr/local/php/lib/php -------------------------------------------------------------------------------- /magpie/TROUBLESHOOTING: -------------------------------------------------------------------------------- 1 | TROUBLESHOOTING 2 | 3 | 4 | Trouble Installing MagpieRSS: 5 | 6 | 1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' 7 | (include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') 8 | 9 | 2. Cache couldn't make dir './cache'. 10 | 11 | 3. Fatal error: Failed to load PHP's XML Extension. 12 | http://www.php.net/manual/en/ref.xml.php 13 | 14 | Trouble Using MagpieRSS 15 | 16 | 4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf. 17 | (HTTP Error: Invalid protocol "") 18 | 19 | 5. Warning: MagpieRSS: Failed to parse RSS file. 20 | (not well-formed (invalid token) at line 19, column 98) 21 | 22 | 6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss. 23 | (HTTP Response: HTTP/1.1 404 Not Found) 24 | 25 | If you would rather provide a custom error, see the COOKBOOK 26 | (http://magpierss.sf.net/cookbook.html) recipe 2. 27 | 28 | ************************************************************************* 29 | 1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' 30 | (include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') 31 | 32 | This could mean that: 33 | 34 | a) PHP can't find the MagpieRSS files. 35 | b) PHP found them the MagpieRSS files, but can't read them. 36 | 37 | a. Telling PHP where to look for MagpieRSS file. 38 | 39 | This might mean your PHP program can't find the MagpieRSS libraries. 40 | Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc, 41 | rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the 42 | cookbook for exceptions). 43 | 44 | This can be fixed by making sure the MagpieRSS files are in your include 45 | path. 46 | 47 | If you can edit your include path (for example your on a shared host) then 48 | you need to replace: 49 | 50 | require_once('rss_fetch.inc'); 51 | 52 | -with- 53 | 54 | define('MAGPIE_DIR', '/path/to/magpierss/'); 55 | require_once(MAGPIE_DIR.'rss_fetch.inc'); 56 | 57 | b. PHP can't read the MagpieRSS files 58 | 59 | All PHP libraries need to be readable by your webserver. 60 | 61 | On Unix you can accomplish this with: 62 | 63 | chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc 64 | 65 | ************************************************************************* 66 | 2. Cache couldn't make dir './cache'. 67 | 68 | MagpieRSS caches the results of fetched and parsed RSS to reduce the load on 69 | both your server, and the remote server providing the RSS. It does this by 70 | writing files to a cache directory. 71 | 72 | This error means the webserver doesn't have write access to the current 73 | directory. 74 | 75 | a. Make a webserver writeable cache directory 76 | 77 | Find the webserver's group. (on my system it is 'www') 78 | 79 | mkdir ./cache 80 | chgrp www directory_name 81 | chmod g+w directory_name 82 | 83 | (this is the best, and desired solution) 84 | 85 | b. Tell MagpieRSS to create the cache directory somewhere the webserver can 86 | write to. 87 | 88 | define('MAGPIE_CACHE_DIR', '/tmp/magpierss'); 89 | 90 | (this is not a great solution, and might have security considerations) 91 | 92 | c. Turn off cacheing. 93 | 94 | Magpie can work fine with cacheing, but it will be slower, and you might 95 | become a nuiance to the RSS provider, but it is an option. 96 | 97 | define('MAGPIE_CACHE_ON', 0); 98 | 99 | d. And lastly, do NOT 100 | 101 | chmod 777 ./cache 102 | 103 | Any of the above solutions are better then this. 104 | 105 | NOTE: If none of this works for you, let me know. I've got root, and a 106 | custom compiled Apache on almost any box I ever touch, so I can be a little 107 | out of touch with reality. But I won't know that if I don't feedback. 108 | 109 | ************************************************************************* 3. 110 | 3. Fatal error: Failed to load PHP's XML Extension. 111 | http://www.php.net/manual/en/ref.xml.php 112 | 113 | -or- 114 | 115 | Fatal error: Failed to create an instance of PHP's XML parser. 116 | http://www.php.net/manual/en/ref.xml.php 117 | 118 | Make sure your PHP was built with --with-xml 119 | 120 | This has been turned on by default for several versions of PHP, but it might 121 | be turned off in your build. 122 | 123 | See php.net for details on building and configuring PHP. 124 | 125 | 126 | ************************************************************************* 127 | 4. Warning: MagpieRSS: Failed to fetch index.rdf. 128 | (HTTP Error: Invalid protocol "") 129 | 130 | You need to put http:// in front of your the URL to your RSS feed 131 | 132 | ************************************************************************* 133 | 5. Warning: MagpieRSS: Failed to parse RSS file. 134 | (not well-formed (invalid token) at line 19, column 98) 135 | 136 | There is a problem with the RSS feed you are trying to read. 137 | MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid 138 | characters. Some RSS parser are based on regular expressions, and can 139 | parse invalid RSS but they have their own problems. 140 | 141 | You could try contacting the author of the RSS feed, and pointing them to 142 | the online RSS validator at: 143 | 144 | http://feeds.archive.org/validator/ 145 | 146 | ************************************************************************* 147 | 6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf 148 | (HTTP Response: HTTP/1.1 404 Not Found) 149 | 150 | Its a 404! The RSS file ain't there. 151 | 152 | 153 | -------------------------------------------------------------------------------- /DBFunctions.php: -------------------------------------------------------------------------------- 1 | _connect(); 4 | //$dbtest->_createTable(); 5 | //$dbtest->_describeTable(); 6 | //$dbtest->_addFeed( 'url', 'title', 'lastTitle'); 7 | echo $dbtest->_getIdForUrl('url'); 8 | echo $dbtest->_getIdForUrl('lol'); 9 | class DBFunctions 10 | { 11 | private $db; 12 | private $message=''; 13 | private $tablename='rss_table'; 14 | 15 | /** 16 | * 17 | */ 18 | function __construct() 19 | { 20 | 21 | } 22 | 23 | /** 24 | * 25 | * @param unknown_type $filename 26 | */ 27 | function _connect($filename='rss_db') 28 | { 29 | $err = ''; 30 | $this->db = new SQLite3($filename); 31 | 32 | if ( !$this->db ) 33 | { 34 | die($err); 35 | } 36 | 37 | return $this->db; 38 | } 39 | 40 | /** 41 | * 42 | */ 43 | function _getMessage() 44 | { 45 | return $this->message; 46 | } 47 | 48 | /** 49 | * 50 | */ 51 | function _describeTable() 52 | { 53 | $result = $this->db->query("SELECT * FROM sqlite_master WHERE name = '$this->tablename'"); 54 | //var_dump ( $result->fetchArray() ); 55 | $result = $this->db->query("PRAGMA table_info($this->tablename)"); 56 | while ( $row = $result->fetchArray() ) 57 | print_r($row); 58 | //var_dump( $this->db->arrayQuery("table_info($tablename)") ); 59 | } 60 | 61 | /** 62 | * 63 | */ 64 | function _createTable() 65 | { 66 | $string = "create table $this->tablename(feedid INTEGER PRIMARY KEY ASC, url varchar(256), title varchar(256), lastTitle varchar(256))"; 67 | $this->db->exec($string); 68 | } 69 | 70 | /** 71 | * 72 | */ 73 | function _getFeeds() 74 | { 75 | $result = $this->db->query("SELECT * FROM $this->tablename"); 76 | $feeds = array(); 77 | while ( $feeds[] = $result->fetchArray() ); 78 | return $feeds; 79 | } 80 | 81 | /** 82 | * 83 | * @param unknown_type $url 84 | */ 85 | function _getIdForUrl( $url ) 86 | { 87 | $url = $this->db->escapeString($url); 88 | $result = $this->db->query("SELECT * FROM $this->tablename WHERE url='$url'"); 89 | if ( !$result ) 90 | { 91 | $this->message = 'No such feed is stored.'; 92 | return false; 93 | } 94 | $array = $result->fetchArray(); 95 | return $array['feedid']; 96 | } 97 | 98 | /** 99 | * 100 | * @param $feedid 101 | */ 102 | function _getFeedDetailsForFeedid( $feedid ) 103 | { 104 | $feedid = $this->db->escapeString($feedid); 105 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE feedid=$feedid"); 106 | if ( !$result ) 107 | { 108 | $this->message = "No such feed id."; 109 | return false; 110 | } 111 | return $result->fetchArray(); 112 | } 113 | 114 | /** 115 | * 116 | * @param $url 117 | */ 118 | function _getFeedDetailsForURL( $url ) 119 | { 120 | $feedid = $this->_getIdForUrl($url); 121 | 122 | if ( !$feedid ) 123 | return false; 124 | 125 | $result = $this->db->query("SELECT * FROM $this->tablename WHERE feedid=$feedid"); 126 | if ( !$result ) 127 | { 128 | $this->message = "No such feed id."; 129 | return false; 130 | } 131 | return $result->fetchArray(); 132 | } 133 | 134 | /** 135 | * 136 | * @param $feedid 137 | * @param $lastTitle 138 | */ 139 | function _updateLastForFeed( $feedid, $lastTitle ) 140 | { 141 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE feedid=$feedid"); 142 | if ( !$result ) 143 | { 144 | $this->message = "No such feed id."; 145 | return false; 146 | } 147 | 148 | $lastTitle = $this->db->escapeString($lastTitle); 149 | $query = "UPDATE $this->tablename SET lastTitle = '$lastTitle' WHERE feedid = $feedid"; 150 | var_dump($query); 151 | $success = $this->db->exec($query); 152 | return $success; 153 | } 154 | 155 | /** 156 | * 157 | * @param unknown_type $url 158 | * @param unknown_type $title 159 | * @param unknown_type $lastTitle 160 | */ 161 | function _addFeed( $url, $title, $lastTitle ) 162 | { 163 | $url = $this->db->escapeString($url); 164 | $title = $this->db->escapeString($title); 165 | $lastTitle = $this->db->escapeString($lastTitle); 166 | $result = $this->db->querySingle("SELECT * FROM $this->tablename WHERE url='$url'"); 167 | if ( $result ) 168 | { 169 | $this->message = "That URL is already stored."; 170 | return false; 171 | } 172 | 173 | if ( !$result ) 174 | { 175 | $result = $this->db->exec("INSERT INTO $this->tablename (url, title, lastTitle) VALUES ('$url', '$title', '$lastTitle')"); 176 | if ( !$result ) 177 | { 178 | $this->message = "Could not insert into table"; 179 | return false; 180 | } 181 | return $this->db->lastInsertRowID(); 182 | } 183 | 184 | } 185 | 186 | // $filename = $bot->_getConfig()->_getConfig( "dbname", "database"); 187 | // $query = $db->query("SELECT name FROM sqlite_master WHERE name = 'tablename'"); 188 | // if ( $query->numRows() ) 189 | // echo "table exists\r\n"; 190 | // else 191 | // echo "table does not exist\r\n"; 192 | // $query = $db->query("SELECT name FROM sqlite_master WHERE name = 'tabelname'"); 193 | // if ( $query->numRows() ) 194 | // echo "tables exists\r\n"; 195 | // else 196 | // echo "table does not exist\r\n"; 197 | // $q = @$db->query('SELECT requests FROM tablename WHERE id = 1'); 198 | // if ($q === false) { 199 | // $db->queryExec('CREATE TABLE tablename (id int, requests int, PRIMARY KEY (id)); INSERT INTO tablename VALUES (1,1)'); 200 | // $hits = 1; 201 | // } else { 202 | // $result = $q->fetchSingle(); 203 | // $hits = $result+1; 204 | // } 205 | // $db->queryExec("UPDATE tablename SET requests = '$hits' WHERE id = 1"); 206 | // } 207 | } 208 | 209 | ?> -------------------------------------------------------------------------------- /RSSFunctions.php: -------------------------------------------------------------------------------- 1 | db = new db_rssFeeds(); 18 | $this->db->_connectRSS(); 19 | } 20 | 21 | function __destruct() 22 | { 23 | echo "destruction of RSSFunctions\r\n"; 24 | } 25 | 26 | 27 | /** 28 | * @param unknown_type $bot 29 | */ 30 | public function _checkForUpdates ($bot) 31 | { 32 | echo "checking for updates\r\n"; 33 | $this->_getCurFeeds($bot); 34 | } 35 | 36 | /** 37 | * 38 | * @param unknown_type $url 39 | */ 40 | public function _getMainTitle ($url) 41 | { 42 | if ( empty($url) ) 43 | { 44 | echo "url is empty\r\n"; 45 | return; 46 | } 47 | $rss = fetch_rss($url); 48 | return $rss->channel['title']; 49 | } 50 | 51 | /** 52 | * 53 | * @param unknown_type $bot 54 | * @param unknown_type $url 55 | */ 56 | public function _getItemsUntilPrevTitle ($bot, $url) 57 | { 58 | if ( empty($bot) || empty($url) ) 59 | { 60 | echo "bot or url is empty\r\n"; 61 | return; 62 | } 63 | $details = $this->db->_getFeedDetailsForURL($url); 64 | $rss = fetch_rss($url); 65 | 66 | if ( !isset($rss->items) || !is_array($rss->items) ) 67 | { 68 | echo "Something wrong with feed, rss->items is nothing\r\n"; 69 | return; 70 | } 71 | 72 | if ($rss->items[0]['title'] == $details['lastTitle']) 73 | { 74 | return; 75 | } 76 | 77 | $success = $this->db->_updateLastForFeed($details['feedid'], $rss->items[0]['title']); 78 | if ( !$success ) 79 | { 80 | echo $this->db->_getRSSMessage()."\r\n"; 81 | break; 82 | } 83 | $messageArray = array(); 84 | foreach ($rss->items as $item) 85 | { 86 | extract($item); 87 | if ($item['title'] == $details['lastTitle']) 88 | { 89 | break; 90 | } 91 | 92 | $combine = ''; 93 | if ( isset($description) ) 94 | { 95 | $combine = $description; 96 | } 97 | else if ( isset($atom_content) ) 98 | { 99 | $combine = $atom_content; 100 | } 101 | if ( $combine ) 102 | { 103 | $split = preg_split("/[\f\n\r\t\v]+/", strip_tags($combine), null, PREG_SPLIT_NO_EMPTY); 104 | $combine = " - ".implode(", ", $split); 105 | 106 | if (strlen($combine) >= 100) 107 | { 108 | $combine = substr($combine, 0, 99) . "..."; 109 | } 110 | 111 | } 112 | $messageArray[] = $details['title'] . " - $title - $link$combine"; 113 | } 114 | foreach ( $messageArray as $message ) 115 | { 116 | foreach ($bot->_getConfig()->_getChans() as $channel) 117 | { 118 | $bot->_sendMsg( $channel, $message); 119 | } 120 | } 121 | return null; 122 | } 123 | 124 | /** 125 | * 126 | * @param unknown_type $url 127 | */ 128 | public function _getFeed ($url) 129 | { 130 | if ( empty($url) ) 131 | { 132 | echo "url is empty\r\n"; 133 | return; 134 | } 135 | $rss = simplexml_load_file($url); 136 | $names = $rss->getNamespaces(); 137 | $titles = $rss->xpath('//title'); 138 | if ($titles[0]) 139 | { 140 | echo "Title-main1: " . $titles[0] . "\r\n"; 141 | } 142 | else 143 | { 144 | if ($names) 145 | { 146 | echo $names[""] . "\r\n"; 147 | $children = $rss->children($names[""]); 148 | $title = $children->title; 149 | echo "Title-main2: " . $title . "\r\n"; 150 | } 151 | } 152 | 153 | $rss = fetch_rss($url); 154 | foreach ($rss->items as $item) 155 | { 156 | echo "Title: " . $item['title'] . "\r\n"; 157 | } 158 | 159 | } 160 | 161 | /** 162 | * 163 | * @param unknown_type $url 164 | */ 165 | public function _getLastFeedItem ($url) 166 | { 167 | if ( empty($url) ) 168 | { 169 | echo "url is empty\r\n"; 170 | return; 171 | } 172 | $rss = fetch_rss($url); 173 | return $rss->items[0]['title']; 174 | } 175 | 176 | /** 177 | * 178 | * @param unknown_type $url 179 | */ 180 | public function _checkFeedHeader ($url) 181 | { 182 | var_dump(get_headers($url)); 183 | } 184 | 185 | /** 186 | * 187 | * @param unknown_type $bot 188 | */ 189 | public function _getCurFeeds ($bot) 190 | { 191 | if ( empty($bot) ) 192 | { 193 | echo "bot is empty\r\n"; 194 | return; 195 | } 196 | foreach ($this->db->_getFeeds() as $feed) 197 | { 198 | if ( !$feed['url'] || empty($feed['url']) ) 199 | { 200 | continue; 201 | } 202 | $this->_getItemsUntilPrevTitle($bot, $feed['url']); 203 | } 204 | } 205 | 206 | /** 207 | * 208 | * @param unknown_type $bot 209 | */ 210 | public function listFeeds ( $bot ) 211 | { 212 | if ( empty($bot) ) 213 | { 214 | echo "bot is empty\r\n"; 215 | return; 216 | } 217 | $feedfun = array(); 218 | $feedfun[] = "Currently stored feeds: Title - URL - Last Entry Title"; 219 | foreach ($this->db->_getFeeds() as $feed) 220 | { 221 | if ( !empty($feed['title']) ) 222 | { 223 | $feedfun[] = $feed['title'] . " - " . $feed['url'] . " - " . $feed['lastTitle']; 224 | } 225 | } 226 | foreach ( $feedfun as $feed ) 227 | { 228 | $bot->_sendMsg( $bot->_getReturnDest(), $feed); 229 | } 230 | return null; 231 | } 232 | 233 | /** 234 | * 235 | * @param unknown_type $bot 236 | * @param unknown_type $url 237 | */ 238 | public function addFeed ($bot, $url) 239 | { 240 | if ( empty($bot) || empty($url) ) 241 | { 242 | echo "bot or url is empty\r\n"; 243 | return; 244 | } 245 | $title = $this->_getMainTitle($url); 246 | $lastTitle = $this->_getLastFeedItem($url); 247 | $rowID = $this->db->_addFeed($url, $title, $lastTitle); 248 | if (! $rowID) 249 | { 250 | return $this->db->_getRSSMessage(); 251 | } 252 | return $rowID; 253 | } 254 | 255 | /** 256 | * 257 | */ 258 | public function remFeed ( $bot, $url ) 259 | { 260 | return "I don't work, but if I did I'd remove a feed"; 261 | } 262 | 263 | /** 264 | * 265 | */ 266 | public function _getFeeders () 267 | { 268 | 269 | } 270 | 271 | /** 272 | * 273 | */ 274 | public function listFeeders () 275 | { 276 | return "I don't work, but if I did I'd list feed admins"; 277 | } 278 | 279 | /** 280 | * 281 | */ 282 | public function addFeeder () 283 | { 284 | return "I don't work, but if I did I'd add a feed admin"; 285 | } 286 | 287 | /** 288 | * 289 | */ 290 | public function remFeeder () 291 | { 292 | return "I don't work, but if I did I'd remove a feed admin"; 293 | } 294 | 295 | /** 296 | * 297 | * @param unknown_type $bot 298 | */ 299 | public function _checkFeederAccess( $bot ) 300 | { 301 | 302 | } 303 | } 304 | 305 | ?> -------------------------------------------------------------------------------- /bot.php: -------------------------------------------------------------------------------- 1 | countLoadedModules." modules. Loaded modules are: ".$bot->listLoadedModules()."\r\n"; 5 | //echo $bot ->doLoggerStuff('log', array('test', 'some text'))."\r\n"; 6 | $bot->_main(); 7 | 8 | /** 9 | * 10 | * @author gabriel 11 | * 12 | */ 13 | class bot 14 | { 15 | 16 | private $config; 17 | private $logger; 18 | private $socket; 19 | private $data; 20 | private $handle_functions; 21 | private $rss_time; 22 | private $modules; 23 | private $methodMap; 24 | private $nick; 25 | private $user; 26 | private $hostmask; 27 | private $returnDest; 28 | private $messageType; 29 | private $message; 30 | 31 | /** 32 | * 33 | * 34 | * 35 | */ 36 | function __construct () 37 | { 38 | //set_include_path(get_include_path() . PATH_SEPARATOR . '/Users/gabriel/Zend/workspaces/DefaultWorkspace7/Crappy RSS Bot/CrappyRSSBot'); 39 | include ('modules.php'); 40 | $this->modules = new modules(); 41 | $this->modules->_loadRequirements($this); 42 | $this->_initialise(); 43 | } 44 | 45 | /** 46 | * 47 | */ 48 | public function _initialise () 49 | { 50 | $this->config = new config(); 51 | $this->config->_loadConfig(); 52 | $this->logger = new botLogger(); 53 | $this->handle_functions = new handle_functions(); 54 | $this->rss_time = time(); 55 | } 56 | 57 | /** 58 | * 59 | * @param $nick 60 | * @param $user 61 | * @param $hostmask 62 | * @param $returnDest 63 | * @param $messageType 64 | * @param $message 65 | */ 66 | function _setPrivmsg( $nick, $user, $hostmask, $returnDest, $messageType, $message ) 67 | { 68 | $this->nick = $nick; 69 | $this->user = $user; 70 | $this->hostmask = $hostmask; 71 | $this->returnDest = $returnDest; 72 | $this->messageType = $messageType; 73 | $this->message = $message; 74 | } 75 | /** 76 | * @return the $message 77 | */ 78 | public function _getMessage () 79 | { 80 | return $this->message; 81 | } 82 | 83 | /** 84 | * @return the $messageType 85 | */ 86 | public function _getMessageType () 87 | { 88 | return $this->messageType; 89 | } 90 | 91 | /** 92 | * @return the $returnDest 93 | */ 94 | public function _getReturnDest () 95 | { 96 | return $this->returnDest; 97 | } 98 | 99 | /** 100 | * @return the $hostmask 101 | */ 102 | public function _getHostmask () 103 | { 104 | return $this->hostmask; 105 | } 106 | 107 | /** 108 | * @return the $user 109 | */ 110 | public function _getUser () 111 | { 112 | return $this->user; 113 | } 114 | 115 | /** 116 | * @return the $nick 117 | */ 118 | public function _getNick () 119 | { 120 | return $this->nick; 121 | } 122 | 123 | /** 124 | * 125 | */ 126 | function _getModules () 127 | { 128 | return $this->modules; 129 | } 130 | 131 | /** 132 | * 133 | * @param $modules 134 | */ 135 | function _setModules ($modules) 136 | { 137 | $this->modules = $modules; 138 | } 139 | 140 | /** 141 | * 142 | */ 143 | function _getMethodmap () 144 | { 145 | return $this->methodMap; 146 | } 147 | 148 | /** 149 | * 150 | * @param $methodMap 151 | */ 152 | function _setMethodmap ($methodMap) 153 | { 154 | $this->methodMap = $methodMap; 155 | } 156 | 157 | /** 158 | * 159 | */ 160 | public function _getConfig ($name = null) 161 | { 162 | if ( !$name ) 163 | { 164 | return $this->config; 165 | } 166 | return $this->config->_getConfig($name); 167 | } 168 | 169 | /** 170 | * 171 | */ 172 | public function _main () 173 | { 174 | $this->_server(); 175 | while ( !feof($this->socket) ) 176 | { 177 | if ($this->rss_time + 120 < time()) 178 | { 179 | echo "Time to check for feed updates\r\n"; 180 | $rss = new RSSFunctions(); 181 | $rss->_checkForUpdates($this); 182 | $this->rss_time = time(); 183 | } 184 | $this->_getFromServer(); 185 | $this->_parseInput(); 186 | } 187 | } 188 | 189 | /** 190 | * 191 | */ 192 | public function _server () 193 | { 194 | $this->socket = @fsockopen($this->config->_getConfig('server'), $this->config->_getConfig('port')); 195 | if ( !$this->socket ) 196 | { 197 | die("Unable to connect to server\r\n"); 198 | } 199 | fputs($this->socket, "USER " . $this->config->_getConfig('user') . " :" . $this->config->_getConfig('nick') . "\r\n"); 200 | fputs($this->socket, "NICK " . $this->config->_getConfig('nick') . "\r\n"); 201 | } 202 | 203 | /** 204 | * 205 | */ 206 | public function _getFromServer () 207 | { 208 | $this->data = trim( fgets($this->socket) ); 209 | if ( !$this->data ) 210 | { 211 | return; 212 | } 213 | echo "========>\t\t" . $this->data . "\r\n"; 214 | return; 215 | } 216 | 217 | /** 218 | * 219 | */ 220 | public function _parseInput () 221 | { 222 | if ( !$this->data ) 223 | { 224 | return; 225 | } 226 | 227 | $explodedData = explode(" ", $this->data); 228 | if ($this->data[0] == ":") 229 | { 230 | $this->data = substr($this->data, 1); 231 | if (method_exists($this->handle_functions, "_handle_" . $explodedData[1])) 232 | { 233 | $return = call_user_func(array($this->handle_functions , "_handle_" . $explodedData[1]), $this); 234 | if (preg_match("/reload (.+) (.+)/", $return, $matches)) 235 | { 236 | $returnDest = $matches[1]; 237 | $filename = $matches[2]; 238 | $reload = new classReloader(); 239 | $this->_sendMsg($returnDest, $reload->reload($this, $filename)); 240 | } 241 | } 242 | } 243 | else 244 | { 245 | if (method_exists($this->handle_functions, "_handle_" . $explodedData[0])) 246 | { 247 | call_user_func(array($this->handle_functions , "_handle_" . $explodedData[0]), $this); 248 | } 249 | } 250 | } 251 | 252 | /** 253 | * 254 | * @param String $destination 255 | * @param String $message 256 | */ 257 | public function _sendMsg ($destination, $message) 258 | { 259 | $this->_putToServer("PRIVMSG $destination :$message"); 260 | sleep(2); 261 | } 262 | 263 | /** 264 | * 265 | * @param String $string 266 | */ 267 | public function _putToServer ($string) 268 | { 269 | echo "<========\t\t$string\r\n"; 270 | fputs($this->socket, "$string\r\n"); 271 | } 272 | 273 | /** 274 | * 275 | * @param String $chan 276 | */ 277 | public function _joinChan ($chan) 278 | { 279 | $this->_putToServer("JOIN $chan\r\n"); 280 | } 281 | 282 | /** 283 | * 284 | */ 285 | public function _joinChans () 286 | { 287 | foreach ($this->config->_getChans() as $channel) 288 | { 289 | $this->_joinChan($channel); 290 | } 291 | 292 | } 293 | 294 | /** 295 | * 296 | */ 297 | public function _getData () 298 | { 299 | return $this->data; 300 | } 301 | 302 | /** 303 | * 304 | * @param unknown_type $message 305 | */ 306 | public function quit ($message) 307 | { 308 | $this->putToServer("QUIT :$message"); 309 | } 310 | 311 | } -------------------------------------------------------------------------------- /magpie/rss_cache.inc: -------------------------------------------------------------------------------- 1 | 7 | * Version: 0.51 8 | * License: GPL 9 | * 10 | * The lastest version of MagpieRSS can be obtained from: 11 | * http://magpierss.sourceforge.net 12 | * 13 | * For questions, help, comments, discussion, etc., please join the 14 | * Magpie mailing list: 15 | * http://lists.sourceforge.net/lists/listinfo/magpierss-general 16 | * 17 | */ 18 | 19 | class RSSCache { 20 | var $BASE_CACHE = './cache'; // where the cache files are stored 21 | var $MAX_AGE = 3600; // when are files stale, default one hour 22 | var $ERROR = ""; // accumulate error messages 23 | 24 | function RSSCache ($base='', $age='') { 25 | if ( $base ) { 26 | $this->BASE_CACHE = $base; 27 | } 28 | if ( $age ) { 29 | $this->MAX_AGE = $age; 30 | } 31 | 32 | // attempt to make the cache directory 33 | if ( ! file_exists( $this->BASE_CACHE ) ) { 34 | $status = @mkdir( $this->BASE_CACHE, 0755 ); 35 | 36 | // if make failed 37 | if ( ! $status ) { 38 | $this->error( 39 | "Cache couldn't make dir '" . $this->BASE_CACHE . "'." 40 | ); 41 | } 42 | } 43 | } 44 | 45 | /*=======================================================================*\ 46 | Function: set 47 | Purpose: add an item to the cache, keyed on url 48 | Input: url from wich the rss file was fetched 49 | Output: true on sucess 50 | \*=======================================================================*/ 51 | function set ($url, $rss) { 52 | $this->ERROR = ""; 53 | $cache_file = $this->file_name( $url ); 54 | $fp = @fopen( $cache_file, 'w' ); 55 | 56 | if ( ! $fp ) { 57 | $this->error( 58 | "Cache unable to open file for writing: $cache_file" 59 | ); 60 | return 0; 61 | } 62 | 63 | 64 | $data = $this->serialize( $rss ); 65 | fwrite( $fp, $data ); 66 | fclose( $fp ); 67 | 68 | return $cache_file; 69 | } 70 | 71 | /*=======================================================================*\ 72 | Function: get 73 | Purpose: fetch an item from the cache 74 | Input: url from wich the rss file was fetched 75 | Output: cached object on HIT, false on MISS 76 | \*=======================================================================*/ 77 | function get ($url) { 78 | $this->ERROR = ""; 79 | $cache_file = $this->file_name( $url ); 80 | 81 | if ( ! file_exists( $cache_file ) ) { 82 | $this->debug( 83 | "Cache doesn't contain: $url (cache file: $cache_file)" 84 | ); 85 | return 0; 86 | } 87 | 88 | $fp = @fopen($cache_file, 'r'); 89 | if ( ! $fp ) { 90 | $this->error( 91 | "Failed to open cache file for reading: $cache_file" 92 | ); 93 | return 0; 94 | } 95 | 96 | if ($filesize = filesize($cache_file) ) { 97 | $data = fread( $fp, filesize($cache_file) ); 98 | $rss = $this->unserialize( $data ); 99 | 100 | return $rss; 101 | } 102 | 103 | return 0; 104 | } 105 | 106 | /*=======================================================================*\ 107 | Function: check_cache 108 | Purpose: check a url for membership in the cache 109 | and whether the object is older then MAX_AGE (ie. STALE) 110 | Input: url from wich the rss file was fetched 111 | Output: cached object on HIT, false on MISS 112 | \*=======================================================================*/ 113 | function check_cache ( $url ) { 114 | $this->ERROR = ""; 115 | $filename = $this->file_name( $url ); 116 | 117 | if ( file_exists( $filename ) ) { 118 | // find how long ago the file was added to the cache 119 | // and whether that is longer then MAX_AGE 120 | $mtime = filemtime( $filename ); 121 | $age = time() - $mtime; 122 | if ( $this->MAX_AGE > $age ) { 123 | // object exists and is current 124 | return 'HIT'; 125 | } 126 | else { 127 | // object exists but is old 128 | return 'STALE'; 129 | } 130 | } 131 | else { 132 | // object does not exist 133 | return 'MISS'; 134 | } 135 | } 136 | 137 | function cache_age( $cache_key ) { 138 | $filename = $this->file_name( $url ); 139 | if ( file_exists( $filename ) ) { 140 | $mtime = filemtime( $filename ); 141 | $age = time() - $mtime; 142 | return $age; 143 | } 144 | else { 145 | return -1; 146 | } 147 | } 148 | 149 | /*=======================================================================*\ 150 | Function: serialize 151 | \*=======================================================================*/ 152 | function serialize ( $rss ) { 153 | return serialize( $rss ); 154 | } 155 | 156 | /*=======================================================================*\ 157 | Function: unserialize 158 | \*=======================================================================*/ 159 | function unserialize ( $data ) { 160 | return unserialize( $data ); 161 | } 162 | 163 | /*=======================================================================*\ 164 | Function: file_name 165 | Purpose: map url to location in cache 166 | Input: url from wich the rss file was fetched 167 | Output: a file name 168 | \*=======================================================================*/ 169 | function file_name ($url) { 170 | $filename = md5( $url ); 171 | return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); 172 | } 173 | 174 | /*=======================================================================*\ 175 | Function: error 176 | Purpose: register error 177 | \*=======================================================================*/ 178 | function error ($errormsg, $lvl=E_USER_WARNING) { 179 | // append PHP's error message if track_errors enabled 180 | if ( isset($php_errormsg) ) { 181 | $errormsg .= " ($php_errormsg)"; 182 | } 183 | $this->ERROR = $errormsg; 184 | if ( MAGPIE_DEBUG ) { 185 | trigger_error( $errormsg, $lvl); 186 | } 187 | else { 188 | error_log( $errormsg, 0); 189 | } 190 | } 191 | 192 | function debug ($debugmsg, $lvl=E_USER_NOTICE) { 193 | if ( MAGPIE_DEBUG ) { 194 | $this->error("MagpieRSS [debug] $debugmsg", $lvl); 195 | } 196 | } 197 | 198 | } 199 | 200 | ?> 201 | -------------------------------------------------------------------------------- /magpie/htdocs/cookbook.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Magie RSS Recipes: Simple PHP RSS How To 4 | 20 | 21 | 22 |

23 |

MagpieRSS Recipes: Cooking with Corbies

24 | 25 |

"Four and twenty blackbirds baked in a 26 | pie."

27 |

28 |

29 |

    30 |
  1. Limit the Number of Headlines(aka Items) Returned
  2. 31 |
  3. Display a Custom Error Message if Something Goes 32 | Wrong
  4. 33 |
  5. Generate a New RSS Feed
  6. 34 |
  7. Display Headlines More Recent then X Date
  8. 35 |
  9. Parse a Local File Containing RSS
  10. 36 | 37 |
38 |

39 | 40 |

1. Limit the Number of Headlines(aka Items) Returned.

41 | 42 |

Problem:

43 | 44 | You want to display the 10 (or 3 or whatever) most recent headlines, but the RSS feed 45 | contains 15. 46 | 47 |

Solution:

48 | 49 |
 50 | $num_items = 10;
 51 | $rss = fetch_rss($url);
 52 | 
 53 | $items = array_slice($rss->items, 0, $num_items);
 54 | 
 55 | foreach ( $items as $item ) {
 56 | 
57 |

Discussion:

58 | 59 | Rather then trying to limit the number of items Magpie parses, a much simpler, 60 | and more flexible approach is to take a "slice" of the array of items. And 61 | array_slice() is smart enough to do the right thing if the feed has less items 62 | then $num_items. 63 | 64 |

See:

http://www.php.net/array_slice 65 |

66 | 67 |

2. Display a Custom Error Message if Something Goes Wrong

68 | 69 |

Problem:

70 | 71 | You don't want Magpie's error messages showing up if something goes wrong. 72 | 73 |

Solution:

74 |
 75 | # Magpie throws USER_WARNINGS only
 76 | # so you can cloak these, by only showing ERRORs
 77 | error_reporting(E_ERROR);
 78 | 
 79 | # check the return value of fetch_rss()
 80 | 
 81 | $rss = fetch_rss($url);
 82 | 
 83 | if ( $rss ) {
 84 | ...display rss feed...
 85 | }
 86 | else {
 87 |    echo "An error occured!  " .
 88 |         "Consider donating more $$$ for restoration of services." .
 89 |         "<br>Error Message: " . magpie_error();
 90 | }
 91 | 
92 |

Discussion:

93 | 94 | MagpieRSS triggers a warning in a number of circumstances. The 2 most common 95 | circumstances are: if the specified RSS file isn't properly formed (usually 96 | because it includes illegal HTML), or if Magpie can't download the remote RSS 97 | file, and there is no cached version. 98 | 99 | If you don't want your users to see these warnings change your error_reporting 100 | settings to only display ERRORs.
101 | Another option is to turn off display_error, 102 | so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. 103 | 104 | You can do this with: 105 | 106 |
107 | # you can also do this in your php.ini file
108 | ini_set('display_errors', 0);
109 | 
110 | 111 |

See:

112 | http://www.php.net/error_reporting,
115 | http://www.php.net/ini_set,
116 | http://www.php.net/manual/en/ref.errorfunc.php
119 | 120 |

3. Generate a New RSS Feed

121 | 122 |

Problem:

123 | 124 | Create an RSS feed for other people to use. 125 | 126 |

Solution:

127 | 128 | Use Useful Inc's RSSWriter. 129 | 130 |

Discussion:

131 | 132 | An example of turning a Magpie parsed RSS object back into an RSS file is 133 | forthcoming. In the meantime RSSWriter is well documented. 134 | 135 |

4. Display Headlines More Recent then X Date

136 | 137 |

Problem:

138 | 139 | You only want to display headlines that were published on, or after a certain 140 | date. 141 | 142 | 143 |

Solution:

144 |
145 | require_once('rss_utils.inc');
146 | 
147 | # get all headlines published today
148 | $today = getdate();
149 | 
150 | # today, 12AM
151 | $date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']);
152 | 
153 | $rss = fetch_rss($url);
154 | 
155 | foreach ( $rss->items as $item ) {
156 |    $published = parse_w3cdtf($item['dc']['date']);
157 |    if ( $published >= $date ) {
158 |         echo "Title: " . $item['title'];
159 |         echo "Published: " . date("h:i:s A", $published);
160 |         echo "<p>";
161 |     }
162 | }
163 | 
164 |

Discussion:

165 | 166 | This recipe only works for RSS 1.0 feeds that include the field. 167 | (which is very good RSS style)
168 | parse_w3cdtf() is defined in 169 | rss_utils.inc, and parses RSS style dates into Unix epoch 170 | seconds. 171 | 172 |

See:

173 | http://www.php.net/manual/en/ref.datetime.php 175 | 176 | 177 |

5. Parse a Local File Containing RSS

178 |

Problem:

179 | MagpieRSS provides fetch_rss() which takes a URL and returns a 180 | parsed RSS object, but what if you want to parse a file stored locally that 181 | doesn't have a URL? 182 | 183 |

Solution

184 |
185 | require_once('rss_parse.inc');
186 | 
187 | $rss_file = 'some_rss_file.rdf';
188 | $rss_string = read_file($rss_file);
189 | $rss = new MagpieRSS( $rss_string );
190 | 
191 | if ( $rss and !$rss->ERROR) {
192 | ...display rss...
193 | }
194 | else {
195 |     echo "Error: " . $rss->ERROR;
196 | }
197 | 
198 | # efficiently read a file into a string
199 | # in php >= 4.3.0 you can simply use file_get_contents()
200 | #
201 | function read_file($filename) {
202 |     $fh = fopen($filename, 'r') or die($php_errormsg);
203 |     $rss_string = fread($fh, filesize($filename) );
204 |     fclose($fh);
205 |     return $rss_string;
206 | }
207 | 
208 | 209 |

Discussion

210 | Here we are using MagpieRSS's RSS parser directly without the convience wrapper 211 | of fetch_rss(). We read the contents of the RSS file into a 212 | string, and pass it to the parser constructor. Notice also that error handling 213 | is subtly different. 214 | 215 |

See:

216 | http://www.php.net/manual/en/ref.filesystem.php,
219 | http://www.php.net/manual/en/language.oop.php 221 | 222 | 235 | 236 | 237 | 238 | -------------------------------------------------------------------------------- /magpie/ChangeLog: -------------------------------------------------------------------------------- 1 | 2005-10-28 14:11 kellan 2 | 3 | * extlib/Snoopy.class.inc: a better solution 4 | 5 | 2005-10-28 11:51 kellan 6 | 7 | * extlib/Snoopy.class.inc: fix arbtriary code execution 8 | vulnerability when using curl+ssl 9 | 10 | http://www.sec-consult.com/216.html 11 | 12 | 2005-03-08 10:46 kellan 13 | 14 | * rss_parse.inc: fix bug w/ atom and date normalization 15 | 16 | 2005-02-09 14:59 kellan 17 | 18 | * rss_fetch.inc: fix stale cache bug 19 | 20 | 2005-01-28 02:27 kellan 21 | 22 | * rss_parse.inc: support php w/o array_change_case 23 | 24 | 2005-01-23 20:02 kellan 25 | 26 | * rss_fetch.inc: fix cache bug introduced by charset encoding 27 | 28 | 2005-01-12 09:14 kellan 29 | 30 | * rss_cache.inc, rss_fetch.inc: more sanity checks for when things 31 | go wrong 32 | 33 | 2004-12-12 13:44 kellan 34 | 35 | * INSTALL, rss_cache.inc, rss_utils.inc: detab 36 | 37 | 2004-11-23 20:15 kellan 38 | 39 | * rss_parse.inc: fix calling iconv instead of mb_convert_encoding 40 | 41 | 2004-11-22 02:11 kellan 42 | 43 | * CHANGES, ChangeLog, rss_parse.inc, scripts/magpie_debug.php: last 44 | bit of tidying 45 | 46 | 2004-11-22 01:45 kellan 47 | 48 | * rss_fetch.inc: detab, bump version 49 | 50 | 2004-11-22 01:43 kellan 51 | 52 | * rss_parse.inc: was filtering too much 53 | 54 | 2004-11-22 00:03 kellan 55 | 56 | * rss_fetch.inc, rss_parse.inc: cache on $url . $output_encoding 57 | otherwise we can get munged output 58 | 59 | 2004-11-21 23:52 kellan 60 | 61 | * rss_parse.inc: add WARNING 62 | 63 | 2004-11-21 23:45 kellan 64 | 65 | * rss_parse.inc: don't set ERROR on notice or warning (rss_fetch 66 | dies on parse errors) 67 | 68 | 2004-11-21 23:44 kellan 69 | 70 | * rss_fetch.inc: add encoding defines (fix timeout error reporting) 71 | 72 | 2004-11-21 20:21 kellan 73 | 74 | * rss_parse.inc: incorporate steve's patch 75 | 76 | 2004-11-21 19:26 kellan 77 | 78 | * rss_parse.inc: remove old debugging functions, totally 79 | arbitrarily. might break stuff. can't really explain why i'm 80 | doing this. 81 | 82 | 2004-10-28 15:52 kellan 83 | 84 | * rss_parse.inc: fixed '=' instead of '==' 85 | 86 | 2004-10-26 00:48 kellan 87 | 88 | * rss_parse.inc: chance epoch to timestamp to conform w/ php naming 89 | conventions 90 | 91 | 2004-06-15 12:00 kellan 92 | 93 | * rss_parse.inc: [no log message] 94 | 95 | 2004-04-26 14:16 kellan 96 | 97 | * rss_fetch.inc: bump version 98 | 99 | 2004-04-26 12:36 kellan 100 | 101 | * rss_parse.inc: fix field doubling 102 | 103 | 2004-04-24 17:47 kellan 104 | 105 | * CHANGES, ChangeLog: updated 106 | 107 | 2004-04-24 17:35 kellan 108 | 109 | * rss_fetch.inc: bumped version 110 | 111 | 2004-04-24 16:52 kellan 112 | 113 | * rss_parse.inc: support arbitrary atom content constructs 114 | 115 | some refactoring 116 | 117 | 2004-04-24 16:15 kellan 118 | 119 | * rss_parse.inc: support summary content contstruct. add normalize 120 | function 121 | 122 | 2004-03-27 16:29 kellan 123 | 124 | * extlib/Snoopy.class.inc: accept self-signed certs 125 | 126 | 2004-03-27 12:53 kellan 127 | 128 | * extlib/Snoopy.class.inc: fixed SSL support * set status * set 129 | error on bad curl 130 | 131 | (also ripped out big chunks of dead weight (submit_form) which 132 | were getting in my way 133 | 134 | 2004-01-25 02:25 kellan 135 | 136 | * rss_parse.inc: make RSS 1.0's rdf:about available 137 | 138 | 2004-01-25 02:07 kellan 139 | 140 | * rss_parse.inc: clean up text, and line formats. add support item 141 | rdf:about 142 | 143 | 2004-01-24 23:40 kellan 144 | 145 | * CHANGES, ChangeLog: update changes 146 | 147 | 2004-01-24 23:37 kellan 148 | 149 | * rss_fetch.inc: updated version 150 | 151 | 2004-01-24 23:35 kellan 152 | 153 | * rss_parse.inc: whitespace 154 | 155 | 2004-01-24 23:23 kellan 156 | 157 | * extlib/Snoopy.class.inc: support badly formatted http headers 158 | 159 | 2004-01-24 23:20 kellan 160 | 161 | * rss_parse.inc: added alpha atom parsing support 162 | 163 | 2003-06-25 22:34 kellan 164 | 165 | * extlib/Snoopy.class.inc: fixed fread 4.3.2 compatibility problems 166 | 167 | 2003-06-13 11:31 kellan 168 | 169 | * rss_fetch.inc: reset cache on 304 170 | 171 | 2003-06-12 21:37 kellan 172 | 173 | * rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: 174 | bumped up version numbers 175 | 176 | 2003-06-12 21:32 kellan 177 | 178 | * htdocs/index.html: updated news 179 | 180 | 2003-06-12 21:27 kellan 181 | 182 | * NEWS: a manual blog :) 183 | 184 | 2003-06-12 21:22 kellan 185 | 186 | * htdocs/index.html: fully qualified img 187 | 188 | 2003-06-12 21:20 kellan 189 | 190 | * htdocs/index.html: clean up. added badge. 191 | 192 | 2003-06-12 21:04 kellan 193 | 194 | * rss_utils.inc: clean up regex 195 | 196 | 2003-06-12 21:02 kellan 197 | 198 | * rss_cache.inc: suppress some warnings 199 | 200 | 2003-05-30 20:44 kellan 201 | 202 | * extlib/Snoopy.class.inc: more comments, cleaned up notice 203 | 204 | 2003-05-30 15:14 kellan 205 | 206 | * extlib/Snoopy.class.inc: don't advertise gzip support if the user 207 | hasn't built php with gzinflate support 208 | 209 | 2003-05-12 22:32 kellan 210 | 211 | * ChangeLog: changes 212 | 213 | 2003-05-12 22:11 kellan 214 | 215 | * htdocs/index.html: announce 0.5 216 | 217 | 2003-05-12 21:42 kellan 218 | 219 | * htdocs/index.html: change 220 | 221 | 2003-05-12 21:39 kellan 222 | 223 | * rss_fetch.inc: use gzip 224 | 225 | 2003-05-12 21:37 kellan 226 | 227 | * extlib/Snoopy.class.inc: added support gzip encoded content 228 | negoiation 229 | 230 | 2003-05-12 21:32 kellan 231 | 232 | * rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: fixed 233 | typoes 234 | 235 | 2003-04-26 21:44 kellan 236 | 237 | * rss_parse.inc: fix minor typo 238 | 239 | 2003-04-18 08:19 kellan 240 | 241 | * htdocs/cookbook.html: updated cookbook to show more code for 242 | limiting items 243 | 244 | 2003-03-03 16:02 kellan 245 | 246 | * rss_parse.inc, scripts/magpie_slashbox.php: committed (or 247 | adpated) patch from Nicola (www.technick.com) to quell 'Undefined 248 | Indexes' notices 249 | 250 | 2003-03-03 15:59 kellan 251 | 252 | * rss_fetch.inc: commited patch from nicola (www.technick.com) to 253 | quell 'undefined indexes' notices. 254 | 255 | * Magpie now automatically includes its version in the 256 | user-agent, & whether cacheing is turned on. 257 | 258 | 2003-02-12 01:22 kellan 259 | 260 | * CHANGES, ChangeLog: ChangeLog now auto-generated by cvs2cl 261 | 262 | 2003-02-12 00:21 kellan 263 | 264 | * rss_fetch.inc: better errors, hopefully stomped on pesky notices 265 | 266 | 2003-02-12 00:19 kellan 267 | 268 | * rss_parse.inc: check to see is xml is supported, if not die 269 | 270 | also throw better xml errors 271 | 272 | 2003-02-12 00:18 kellan 273 | 274 | * rss_cache.inc: hopefully cleared up some notices that were being 275 | thrown into the log 276 | 277 | fixed a debug statement that was being called as an error 278 | 279 | 2003-02-12 00:15 kellan 280 | 281 | * scripts/: magpie_simple.php, magpie_slashbox.php: moved 282 | magpie_simple to magpie_slashbox, and replaced it with a simpler 283 | demo. 284 | 285 | 2003-02-12 00:02 kellan 286 | 287 | * INSTALL, README, TROUBLESHOOTING: Improved documentation. Better 288 | install instructions. 289 | 290 | TROUBLESHOOTING cover common installation and usage problems 291 | 292 | 2003-01-22 14:40 kellan 293 | 294 | * htdocs/cookbook.html: added cookbook.html 295 | 296 | 2003-01-21 23:47 kellan 297 | 298 | * cookbook: a magpie cookbook 299 | 300 | 2003-01-20 10:09 kellan 301 | 302 | * ChangeLog: updated 303 | 304 | 2003-01-20 09:23 kellan 305 | 306 | * scripts/simple_smarty.php: minor clean up 307 | 308 | 2003-01-20 09:15 kellan 309 | 310 | * scripts/README: added smarty url 311 | 312 | 2003-01-20 09:14 kellan 313 | 314 | * magpie_simple.php, htdocs/index.html, scripts/README, 315 | scripts/magpie_debug.php, scripts/magpie_simple.php, 316 | scripts/simple_smarty.php, 317 | scripts/smarty_plugin/modifier.rss_date_parse.php, 318 | scripts/templates/simple.smarty: Added scripts directory for 319 | examples on how to use MagpieRSS 320 | 321 | magpie_simple - is a simple example magpie_debug - spew all the 322 | information from a parsed RSS feed simple_smary - example of 323 | using magpie with Smarty template system 324 | smarty_plugin/modifier.rss_date_parse.php - support file for the 325 | smarty demo templates/simple.smary - template for the smarty demo 326 | 327 | 2003-01-20 09:11 kellan 328 | 329 | * rss_fetch.inc, rss_parse.inc: changes to error handling to give 330 | script authors more access to magpie's errors. 331 | 332 | added method magpie_error() to retrieve global MAGPIE_ERROR 333 | variable for when fetch_rss() returns false 334 | 335 | 2002-10-26 19:02 kellan 336 | 337 | * htdocs/index.html: putting the website under source control 338 | 339 | 2002-10-26 18:43 kellan 340 | 341 | * AUTHORS, ChangeLog, INSTALL, README: some documentation to make 342 | it all look official :) 343 | 344 | 2002-10-25 23:04 kellan 345 | 346 | * magpie_simple.php: quxx 347 | 348 | 2002-10-25 23:04 kellan 349 | 350 | * rss_parse.inc: added support for textinput and image 351 | 352 | 2002-10-25 19:23 kellan 353 | 354 | * magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc, 355 | rss_utils.inc: switched to using Snoopy for fetching remote RSS 356 | files. 357 | 358 | added support for conditional gets 359 | 360 | 2002-10-25 19:22 kellan 361 | 362 | * rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: 363 | Change comment style to slavishly imitate the phpinsider style 364 | found in Smarty and Snoopy :) 365 | 366 | 2002-10-25 19:18 kellan 367 | 368 | * extlib/Snoopy.class.inc: added Snoopy in order to support 369 | conditional gets 370 | 371 | 2002-10-23 23:19 kellan 372 | 373 | * magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc: 374 | MAJOR CLEANUP! 375 | 376 | * rss_fetch got rid of the options array, replaced it with a more 377 | PHP-like solution of using defines. constants are setup, with 378 | defaults, in the function init() 379 | 380 | got rid of the idiom of passing back an array, its was awkward to 381 | deal with in PHP, and unusual (and consquently confusing to 382 | people). now i return true/false values, and try to setup error 383 | string where appropiate (rss_cache has the most complete example 384 | of this) 385 | 386 | change the logic for interacting with the cache 387 | 388 | * rss_cache major re-working of how error are handled. tried to 389 | make the code more resillient. the cache is now much more aware 390 | of MAX_AGE, where before this was being driven out of rss_fetch 391 | (which was silly) 392 | 393 | * rss_parse properly handles xml parse errors. used to sail 394 | along blithely unaware. 395 | 396 | 2002-09-11 11:11 kellan 397 | 398 | * rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, 399 | rss_utils.inc: Initial revision 400 | 401 | 2002-09-11 11:11 kellan 402 | 403 | * rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, 404 | rss_utils.inc: initial import 405 | 406 | -------------------------------------------------------------------------------- /magpie/htdocs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Magpie RSS - PHP RSS Parser 4 | 6 | 36 | 37 | 38 | 39 |

MagpieRSS

40 |

41 |

MagpieRSS provides an XML-based (expat) RSS parser in PHP.

42 |

43 | MagpieRSS is compatible with RSS .9 through RSS 1.0, and supports the 44 | RSS 1.0's modules. (with a few exceptions) 45 |

46 |

66 | 67 |

News!

68 | 194 |

195 |

196 | 197 |

Why?

198 | I wrote MagpieRSS out of a frustration with the limitations of existing 199 | solutions. In particular many of the existing PHP solutions seemed to: 200 | 208 | In particular I failed to find any PHP RSS parsers that could sufficiently 209 | parse RSS 1.0 feeds, to be useful on the RSS based event feeds we generate 210 | at Protest.net. 211 |

212 |

213 | 214 |

Features

215 | 216 | 267 | 268 | 269 |

270 |

271 | 272 |

Magpie's approach to parsing RSS

273 | 274 | Magpie takes a naive, and inclusive approach. Absolutely 275 | non-validating, as long as the RSS feed is well formed, Magpie will 276 | cheerfully parse new, and never before seen tags in your RSS feeds. 277 |

278 |

279 | This makes it very simple support the varied versions of RSS simply, but 280 | forces the consumer of a RSS feed to be cognizant of how it is 281 | structured.(at least if you want to do something fancy) 282 |

283 |

284 | Magpie parses a RSS feed into a simple object, with 4 fields: 285 | channel, items, image, and 286 | textinput. 287 |

288 |

289 |

channel

290 | $rss->channel contains key-value pairs of all tags, without 291 | nested tags, found between the root tag (<rdf:RDF>, or <rss>) 292 | and the end of the document. 293 |

294 |

295 |

items

296 | $rss->items is an array of associative arrays, each one 297 | describing a single item. An example that looks like: 298 |
299 | <item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257">
300 | <title>Weekly Peace Vigil</title>
301 | <link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link>
302 | <description>Wear a white ribbon</description>
303 | <dc:subject>Peace</dc:subject>
304 | <ev:startdate>2002-06-01T11:00:00</ev:startdate>
305 | <ev:location>Northampton, MA</ev:location>
306 | <ev:enddate>2002-06-01T12:00:00</ev:enddate>
307 | <ev:type>Protest</ev:type>
308 | </item>
309 | 	

310 | Is parsed, and pushed on the $rss->items array as: 311 |

312 | array(
313 | 	title => 'Weekly Peace Vigil',
314 | 	link => 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257',
315 | 	description => 'Wear a white ribbon',
316 | 	dc => array (
317 | 			subject => 'Peace'
318 | 		),
319 | 	ev => array (
320 | 		startdate => '2002-06-01T11:00:00',
321 | 		enddate => '2002-06-01T12:00:00',
322 | 		type => 'Protest',
323 | 		location => 'Northampton, MA'
324 | 	)
325 | );
326 | 
327 |

328 |

329 |

image and textinput

330 | $rss->image and $rss-textinput are associative arrays 331 | including name-value pairs for anything found between the respective parent 332 | tags. 333 |

334 |

335 | 336 |

Usage Examples:

337 | 338 | A very simple example would be: 339 |
340 | require_once 'rss_fetch.inc';
341 | 
342 | $url = 'http://magpie.sf.net/samples/imc.1-0.rdf';
343 | $rss = fetch_rss($url);
344 | 
345 | echo "Site: ", $rss->channel['title'], "<br>\n";
346 | foreach ($rss->items as $item ) {
347 | 	$title = $item[title];
348 | 	$url   = $item[link];
349 | 	echo "<a href=$url>$title</a></li><br>\n";
350 | }
351 | 
352 | More soon....in the meantime you can check out a 353 | cool tool built with 354 | MagpieRSS, version 0.1. 355 |

356 |

357 | 358 |

Todos

359 |

RSS Parser

360 | 368 | 369 |

RSS Cache

370 | 373 | 374 |

Fetch RSS

375 | 382 |

Misc

383 | 390 | 391 |

392 |

393 |

RSS Resources

394 | . 408 |

409 |

License and Contact Info

410 | Magpie is distributed under the GPL license... 411 |

412 | coded by: kellan (at) protest.net, feedback is always appreciated. 413 |

414 | SourceForge.net Logo 417 | 418 | 419 | 420 | -------------------------------------------------------------------------------- /magpie/rss_fetch.inc: -------------------------------------------------------------------------------- 1 | 8 | * License: GPL 9 | * 10 | * The lastest version of MagpieRSS can be obtained from: 11 | * http://magpierss.sourceforge.net 12 | * 13 | * For questions, help, comments, discussion, etc., please join the 14 | * Magpie mailing list: 15 | * magpierss-general@lists.sourceforge.net 16 | * 17 | */ 18 | 19 | // Setup MAGPIE_DIR for use on hosts that don't include 20 | // the current path in include_path. 21 | // with thanks to rajiv and smarty 22 | if (!defined('DIR_SEP')) { 23 | define('DIR_SEP', DIRECTORY_SEPARATOR); 24 | } 25 | 26 | if (!defined('MAGPIE_DIR')) { 27 | define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); 28 | } 29 | 30 | require_once( MAGPIE_DIR . 'rss_parse.inc' ); 31 | require_once( MAGPIE_DIR . 'rss_cache.inc' ); 32 | 33 | // for including 3rd party libraries 34 | define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); 35 | require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); 36 | 37 | 38 | /* 39 | * CONSTANTS - redefine these in your script to change the 40 | * behaviour of fetch_rss() currently, most options effect the cache 41 | * 42 | * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? 43 | * For me a built in cache was essential to creating a "PHP-like" 44 | * feel to Magpie, see rss_cache.inc for rationale 45 | * 46 | * 47 | * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? 48 | * This should be a location that the webserver can write to. If this 49 | * directory does not already exist Mapie will try to be smart and create 50 | * it. This will often fail for permissions reasons. 51 | * 52 | * 53 | * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. 54 | * 55 | * 56 | * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error 57 | * instead of returning stale object? 58 | * 59 | * MAGPIE_DEBUG - Display debugging notices? 60 | * 61 | */ 62 | 63 | 64 | /*=======================================================================*\ 65 | Function: fetch_rss: 66 | Purpose: return RSS object for the give url 67 | maintain the cache 68 | Input: url of RSS file 69 | Output: parsed RSS object (see rss_parse.inc) 70 | 71 | NOTES ON CACHEING: 72 | If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. 73 | 74 | NOTES ON RETRIEVING REMOTE FILES: 75 | If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will 76 | return a cached object, and touch the cache object upon recieving a 77 | 304. 78 | 79 | NOTES ON FAILED REQUESTS: 80 | If there is an HTTP error while fetching an RSS object, the cached 81 | version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) 82 | \*=======================================================================*/ 83 | 84 | define('MAGPIE_VERSION', '0.72'); 85 | 86 | $MAGPIE_ERROR = ""; 87 | 88 | function fetch_rss ($url) { 89 | // initialize constants 90 | init(); 91 | 92 | if ( !isset($url) ) { 93 | error("fetch_rss called without a url"); 94 | return false; 95 | } 96 | 97 | // if cache is disabled 98 | if ( !MAGPIE_CACHE_ON ) { 99 | // fetch file, and parse it 100 | $resp = _fetch_remote_file( $url ); 101 | if ( is_success( $resp->status ) ) { 102 | return _response_to_rss( $resp ); 103 | } 104 | else { 105 | error("Failed to fetch $url and cache is off"); 106 | return false; 107 | } 108 | } 109 | // else cache is ON 110 | else { 111 | // Flow 112 | // 1. check cache 113 | // 2. if there is a hit, make sure its fresh 114 | // 3. if cached obj fails freshness check, fetch remote 115 | // 4. if remote fails, return stale object, or error 116 | 117 | $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 118 | 119 | if (MAGPIE_DEBUG and $cache->ERROR) { 120 | debug($cache->ERROR, E_USER_WARNING); 121 | } 122 | 123 | 124 | $cache_status = 0; // response of check_cache 125 | $request_headers = array(); // HTTP headers to send with fetch 126 | $rss = 0; // parsed RSS object 127 | $errormsg = 0; // errors, if any 128 | 129 | // store parsed XML by desired output encoding 130 | // as character munging happens at parse time 131 | $cache_key = $url . MAGPIE_OUTPUT_ENCODING; 132 | 133 | if (!$cache->ERROR) { 134 | // return cache HIT, MISS, or STALE 135 | $cache_status = $cache->check_cache( $cache_key); 136 | } 137 | 138 | // if object cached, and cache is fresh, return cached obj 139 | if ( $cache_status == 'HIT' ) { 140 | $rss = $cache->get( $cache_key ); 141 | if ( isset($rss) and $rss ) { 142 | // should be cache age 143 | $rss->from_cache = 1; 144 | if ( MAGPIE_DEBUG > 1) { 145 | debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 146 | } 147 | return $rss; 148 | } 149 | } 150 | 151 | // else attempt a conditional get 152 | 153 | // setup headers 154 | if ( $cache_status == 'STALE' ) { 155 | $rss = $cache->get( $cache_key ); 156 | if ( $rss and $rss->etag and $rss->last_modified ) { 157 | $request_headers['If-None-Match'] = $rss->etag; 158 | $request_headers['If-Last-Modified'] = $rss->last_modified; 159 | } 160 | } 161 | 162 | $resp = _fetch_remote_file( $url, $request_headers ); 163 | 164 | if (isset($resp) and $resp) { 165 | if ($resp->status == '304' ) { 166 | // we have the most current copy 167 | if ( MAGPIE_DEBUG > 1) { 168 | debug("Got 304 for $url"); 169 | } 170 | // reset cache on 304 (at minutillo insistent prodding) 171 | $cache->set($cache_key, $rss); 172 | return $rss; 173 | } 174 | elseif ( is_success( $resp->status ) ) { 175 | $rss = _response_to_rss( $resp ); 176 | if ( $rss ) { 177 | if (MAGPIE_DEBUG > 1) { 178 | debug("Fetch successful"); 179 | } 180 | // add object to cache 181 | $cache->set( $cache_key, $rss ); 182 | return $rss; 183 | } 184 | } 185 | else { 186 | $errormsg = "Failed to fetch $url "; 187 | if ( $resp->status == '-100' ) { 188 | $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; 189 | } 190 | elseif ( $resp->error ) { 191 | # compensate for Snoopy's annoying habbit to tacking 192 | # on '\n' 193 | $http_error = substr($resp->error, 0, -2); 194 | $errormsg .= "(HTTP Error: $http_error)"; 195 | } 196 | else { 197 | $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 198 | } 199 | } 200 | } 201 | else { 202 | $errormsg = "Unable to retrieve RSS file for unknown reasons."; 203 | } 204 | 205 | // else fetch failed 206 | 207 | // attempt to return cached object 208 | if ($rss) { 209 | if ( MAGPIE_DEBUG ) { 210 | debug("Returning STALE object for $url"); 211 | } 212 | return $rss; 213 | } 214 | 215 | // else we totally failed 216 | error( $errormsg ); 217 | 218 | return false; 219 | 220 | } // end if ( !MAGPIE_CACHE_ON ) { 221 | } // end fetch_rss() 222 | 223 | /*=======================================================================*\ 224 | Function: error 225 | Purpose: set MAGPIE_ERROR, and trigger error 226 | \*=======================================================================*/ 227 | 228 | function error ($errormsg, $lvl=E_USER_WARNING) { 229 | global $MAGPIE_ERROR; 230 | 231 | // append PHP's error message if track_errors enabled 232 | if ( isset($php_errormsg) ) { 233 | $errormsg .= " ($php_errormsg)"; 234 | } 235 | if ( $errormsg ) { 236 | $errormsg = "MagpieRSS: $errormsg"; 237 | $MAGPIE_ERROR = $errormsg; 238 | trigger_error( $errormsg, $lvl); 239 | } 240 | } 241 | 242 | function debug ($debugmsg, $lvl=E_USER_NOTICE) { 243 | trigger_error("MagpieRSS [debug] $debugmsg", $lvl); 244 | } 245 | 246 | /*=======================================================================*\ 247 | Function: magpie_error 248 | Purpose: accessor for the magpie error variable 249 | \*=======================================================================*/ 250 | function magpie_error ($errormsg="") { 251 | global $MAGPIE_ERROR; 252 | 253 | if ( isset($errormsg) and $errormsg ) { 254 | $MAGPIE_ERROR = $errormsg; 255 | } 256 | 257 | return $MAGPIE_ERROR; 258 | } 259 | 260 | /*=======================================================================*\ 261 | Function: _fetch_remote_file 262 | Purpose: retrieve an arbitrary remote file 263 | Input: url of the remote file 264 | headers to send along with the request (optional) 265 | Output: an HTTP response object (see Snoopy.class.inc) 266 | \*=======================================================================*/ 267 | function _fetch_remote_file ($url, $headers = "" ) { 268 | // Snoopy is an HTTP client in PHP 269 | $client = new Snoopy(); 270 | $client->agent = MAGPIE_USER_AGENT; 271 | $client->read_timeout = MAGPIE_FETCH_TIME_OUT; 272 | $client->use_gzip = MAGPIE_USE_GZIP; 273 | if (is_array($headers) ) { 274 | $client->rawheaders = $headers; 275 | } 276 | 277 | @$client->fetch($url); 278 | return $client; 279 | 280 | } 281 | 282 | /*=======================================================================*\ 283 | Function: _response_to_rss 284 | Purpose: parse an HTTP response object into an RSS object 285 | Input: an HTTP response object (see Snoopy) 286 | Output: parsed RSS object (see rss_parse) 287 | \*=======================================================================*/ 288 | function _response_to_rss ($resp) { 289 | $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); 290 | 291 | // if RSS parsed successfully 292 | if ( $rss and !$rss->ERROR) { 293 | 294 | // find Etag, and Last-Modified 295 | foreach($resp->headers as $h) { 296 | // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" 297 | if (strpos($h, ": ")) { 298 | list($field, $val) = explode(": ", $h, 2); 299 | } 300 | else { 301 | $field = $h; 302 | $val = ""; 303 | } 304 | 305 | if ( $field == 'ETag' ) { 306 | $rss->etag = $val; 307 | } 308 | 309 | if ( $field == 'Last-Modified' ) { 310 | $rss->last_modified = $val; 311 | } 312 | } 313 | 314 | return $rss; 315 | } // else construct error message 316 | else { 317 | $errormsg = "Failed to parse RSS file."; 318 | 319 | if ($rss) { 320 | $errormsg .= " (" . $rss->ERROR . ")"; 321 | } 322 | error($errormsg); 323 | 324 | return false; 325 | } // end if ($rss and !$rss->error) 326 | } 327 | 328 | /*=======================================================================*\ 329 | Function: init 330 | Purpose: setup constants with default values 331 | check for user overrides 332 | \*=======================================================================*/ 333 | function init () { 334 | if ( defined('MAGPIE_INITALIZED') ) { 335 | return; 336 | } 337 | else { 338 | define('MAGPIE_INITALIZED', true); 339 | } 340 | 341 | if ( !defined('MAGPIE_CACHE_ON') ) { 342 | define('MAGPIE_CACHE_ON', true); 343 | } 344 | 345 | if ( !defined('MAGPIE_CACHE_DIR') ) { 346 | define('MAGPIE_CACHE_DIR', './cache'); 347 | } 348 | 349 | if ( !defined('MAGPIE_CACHE_AGE') ) { 350 | define('MAGPIE_CACHE_AGE', 60*60); // one hour 351 | } 352 | 353 | if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { 354 | define('MAGPIE_CACHE_FRESH_ONLY', false); 355 | } 356 | 357 | if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { 358 | define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); 359 | } 360 | 361 | if ( !defined('MAGPIE_INPUT_ENCODING') ) { 362 | define('MAGPIE_INPUT_ENCODING', null); 363 | } 364 | 365 | if ( !defined('MAGPIE_DETECT_ENCODING') ) { 366 | define('MAGPIE_DETECT_ENCODING', true); 367 | } 368 | 369 | if ( !defined('MAGPIE_DEBUG') ) { 370 | define('MAGPIE_DEBUG', 0); 371 | } 372 | 373 | if ( !defined('MAGPIE_USER_AGENT') ) { 374 | $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; 375 | 376 | if ( MAGPIE_CACHE_ON ) { 377 | $ua = $ua . ')'; 378 | } 379 | else { 380 | $ua = $ua . '; No cache)'; 381 | } 382 | 383 | define('MAGPIE_USER_AGENT', $ua); 384 | } 385 | 386 | if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { 387 | define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout 388 | } 389 | 390 | // use gzip encoding to fetch rss files if supported? 391 | if ( !defined('MAGPIE_USE_GZIP') ) { 392 | define('MAGPIE_USE_GZIP', true); 393 | } 394 | } 395 | 396 | // NOTE: the following code should really be in Snoopy, or at least 397 | // somewhere other then rss_fetch! 398 | 399 | /*=======================================================================*\ 400 | HTTP STATUS CODE PREDICATES 401 | These functions attempt to classify an HTTP status code 402 | based on RFC 2616 and RFC 2518. 403 | 404 | All of them take an HTTP status code as input, and return true or false 405 | 406 | All this code is adapted from LWP's HTTP::Status. 407 | \*=======================================================================*/ 408 | 409 | 410 | /*=======================================================================*\ 411 | Function: is_info 412 | Purpose: return true if Informational status code 413 | \*=======================================================================*/ 414 | function is_info ($sc) { 415 | return $sc >= 100 && $sc < 200; 416 | } 417 | 418 | /*=======================================================================*\ 419 | Function: is_success 420 | Purpose: return true if Successful status code 421 | \*=======================================================================*/ 422 | function is_success ($sc) { 423 | return $sc >= 200 && $sc < 300; 424 | } 425 | 426 | /*=======================================================================*\ 427 | Function: is_redirect 428 | Purpose: return true if Redirection status code 429 | \*=======================================================================*/ 430 | function is_redirect ($sc) { 431 | return $sc >= 300 && $sc < 400; 432 | } 433 | 434 | /*=======================================================================*\ 435 | Function: is_error 436 | Purpose: return true if Error status code 437 | \*=======================================================================*/ 438 | function is_error ($sc) { 439 | return $sc >= 400 && $sc < 600; 440 | } 441 | 442 | /*=======================================================================*\ 443 | Function: is_client_error 444 | Purpose: return true if Error status code, and its a client error 445 | \*=======================================================================*/ 446 | function is_client_error ($sc) { 447 | return $sc >= 400 && $sc < 500; 448 | } 449 | 450 | /*=======================================================================*\ 451 | Function: is_client_error 452 | Purpose: return true if Error status code, and its a server error 453 | \*=======================================================================*/ 454 | function is_server_error ($sc) { 455 | return $sc >= 500 && $sc < 600; 456 | } 457 | 458 | ?> 459 | -------------------------------------------------------------------------------- /magpie/rss_parse.inc: -------------------------------------------------------------------------------- 1 | 18 | * @version 0.7a 19 | * @license GPL 20 | * 21 | */ 22 | 23 | define('RSS', 'RSS'); 24 | define('ATOM', 'Atom'); 25 | 26 | require_once (MAGPIE_DIR . 'rss_utils.inc'); 27 | 28 | /** 29 | * Hybrid parser, and object, takes RSS as a string and returns a simple object. 30 | * 31 | * see: rss_fetch.inc for a simpler interface with integrated caching support 32 | * 33 | */ 34 | class MagpieRSS { 35 | var $parser; 36 | 37 | var $current_item = array(); // item currently being parsed 38 | var $items = array(); // collection of parsed items 39 | var $channel = array(); // hash of channel fields 40 | var $textinput = array(); 41 | var $image = array(); 42 | var $feed_type; 43 | var $feed_version; 44 | var $encoding = ''; // output encoding of parsed rss 45 | 46 | var $_source_encoding = ''; // only set if we have to parse xml prolog 47 | 48 | var $ERROR = ""; 49 | var $WARNING = ""; 50 | 51 | // define some constants 52 | 53 | var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); 54 | var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); 55 | 56 | // parser variables, useless if you're not a parser, treat as private 57 | var $stack = array(); // parser stack 58 | var $inchannel = false; 59 | var $initem = false; 60 | var $incontent = false; // if in Atom field 61 | var $intextinput = false; 62 | var $inimage = false; 63 | var $current_namespace = false; 64 | 65 | 66 | /** 67 | * Set up XML parser, parse source, and return populated RSS object.. 68 | * 69 | * @param string $source string containing the RSS to be parsed 70 | * 71 | * NOTE: Probably a good idea to leave the encoding options alone unless 72 | * you know what you're doing as PHP's character set support is 73 | * a little weird. 74 | * 75 | * NOTE: A lot of this is unnecessary but harmless with PHP5 76 | * 77 | * 78 | * @param string $output_encoding output the parsed RSS in this character 79 | * set defaults to ISO-8859-1 as this is PHP's 80 | * default. 81 | * 82 | * NOTE: might be changed to UTF-8 in future 83 | * versions. 84 | * 85 | * @param string $input_encoding the character set of the incoming RSS source. 86 | * Leave blank and Magpie will try to figure it 87 | * out. 88 | * 89 | * 90 | * @param bool $detect_encoding if false Magpie won't attempt to detect 91 | * source encoding. (caveat emptor) 92 | * 93 | */ 94 | function MagpieRSS ($source, $output_encoding='ISO-8859-1', 95 | $input_encoding=null, $detect_encoding=true) 96 | { 97 | # if PHP xml isn't compiled in, die 98 | # 99 | if (!function_exists('xml_parser_create')) { 100 | $this->error( "Failed to load PHP's XML Extension. " . 101 | "http://www.php.net/manual/en/ref.xml.php", 102 | E_USER_ERROR ); 103 | } 104 | 105 | list($parser, $source) = $this->create_parser($source, 106 | $output_encoding, $input_encoding, $detect_encoding); 107 | 108 | 109 | if (!is_resource($parser)) { 110 | $this->error( "Failed to create an instance of PHP's XML parser. " . 111 | "http://www.php.net/manual/en/ref.xml.php", 112 | E_USER_ERROR ); 113 | } 114 | 115 | 116 | $this->parser = $parser; 117 | 118 | # pass in parser, and a reference to this object 119 | # setup handlers 120 | # 121 | xml_set_object( $this->parser, $this ); 122 | xml_set_element_handler($this->parser, 123 | 'feed_start_element', 'feed_end_element' ); 124 | 125 | xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 126 | 127 | $status = xml_parse( $this->parser, $source ); 128 | 129 | if (! $status ) { 130 | $errorcode = xml_get_error_code( $this->parser ); 131 | if ( $errorcode != XML_ERROR_NONE ) { 132 | $xml_error = xml_error_string( $errorcode ); 133 | $error_line = xml_get_current_line_number($this->parser); 134 | $error_col = xml_get_current_column_number($this->parser); 135 | $errormsg = "$xml_error at line $error_line, column $error_col"; 136 | 137 | $this->error( $errormsg ); 138 | } 139 | } 140 | 141 | xml_parser_free( $this->parser ); 142 | 143 | $this->normalize(); 144 | } 145 | 146 | function feed_start_element($p, $element, &$attrs) { 147 | $el = $element = strtolower($element); 148 | $attrs = array_change_key_case($attrs, CASE_LOWER); 149 | 150 | // check for a namespace, and split if found 151 | $ns = false; 152 | if ( strpos( $element, ':' ) ) { 153 | list($ns, $el) = split( ':', $element, 2); 154 | } 155 | if ( $ns and $ns != 'rdf' ) { 156 | $this->current_namespace = $ns; 157 | } 158 | 159 | # if feed type isn't set, then this is first element of feed 160 | # identify feed from root element 161 | # 162 | if (!isset($this->feed_type) ) { 163 | if ( $el == 'rdf' ) { 164 | $this->feed_type = RSS; 165 | $this->feed_version = '1.0'; 166 | } 167 | elseif ( $el == 'rss' ) { 168 | $this->feed_type = RSS; 169 | if ( isset($attrs['version']) ) 170 | { 171 | $this->feed_version = $attrs['version']; 172 | } 173 | } 174 | elseif ( $el == 'feed' ) { 175 | $this->feed_type = ATOM; 176 | if ( isset($attrs['version']) ) 177 | { 178 | $this->feed_version = $attrs['version']; 179 | } 180 | $this->inchannel = true; 181 | } 182 | return; 183 | } 184 | 185 | if ( $el == 'channel' ) 186 | { 187 | $this->inchannel = true; 188 | } 189 | elseif ($el == 'item' or $el == 'entry' ) 190 | { 191 | $this->initem = true; 192 | if ( isset($attrs['rdf:about']) ) { 193 | $this->current_item['about'] = $attrs['rdf:about']; 194 | } 195 | } 196 | 197 | // if we're in the default namespace of an RSS feed, 198 | // record textinput or image fields 199 | elseif ( 200 | $this->feed_type == RSS and 201 | $this->current_namespace == '' and 202 | $el == 'textinput' ) 203 | { 204 | $this->intextinput = true; 205 | } 206 | 207 | elseif ( 208 | $this->feed_type == RSS and 209 | $this->current_namespace == '' and 210 | $el == 'image' ) 211 | { 212 | $this->inimage = true; 213 | } 214 | 215 | # handle atom content constructs 216 | elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 217 | { 218 | // avoid clashing w/ RSS mod_content 219 | if ($el == 'content' ) { 220 | $el = 'atom_content'; 221 | } 222 | 223 | $this->incontent = $el; 224 | 225 | 226 | } 227 | 228 | // if inside an Atom content construct (e.g. content or summary) field treat tags as text 229 | elseif ($this->feed_type == ATOM and $this->incontent ) 230 | { 231 | // if tags are inlined, then flatten 232 | $attrs_str = join(' ', 233 | array_map('map_attrs', 234 | array_keys($attrs), 235 | array_values($attrs) ) ); 236 | 237 | $this->append_content( "<$element $attrs_str>" ); 238 | 239 | array_unshift( $this->stack, $el ); 240 | } 241 | 242 | // Atom support many links per containging element. 243 | // Magpie treats link elements of type rel='alternate' 244 | // as being equivalent to RSS's simple link element. 245 | // 246 | elseif ($this->feed_type == ATOM and $el == 'link' ) 247 | { 248 | if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 249 | { 250 | $link_el = 'link'; 251 | } 252 | else { 253 | $link_el = 'link_' . $attrs['rel']; 254 | } 255 | 256 | $this->append($link_el, $attrs['href']); 257 | } 258 | // set stack[0] to current element 259 | else { 260 | array_unshift($this->stack, $el); 261 | } 262 | } 263 | 264 | 265 | 266 | function feed_cdata ($p, $text) { 267 | if ($this->feed_type == ATOM and $this->incontent) 268 | { 269 | $this->append_content( $text ); 270 | } 271 | else { 272 | $current_el = join('_', array_reverse($this->stack)); 273 | $this->append($current_el, $text); 274 | } 275 | } 276 | 277 | function feed_end_element ($p, $el) { 278 | $el = strtolower($el); 279 | 280 | if ( $el == 'item' or $el == 'entry' ) 281 | { 282 | $this->items[] = $this->current_item; 283 | $this->current_item = array(); 284 | $this->initem = false; 285 | } 286 | elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 287 | { 288 | $this->intextinput = false; 289 | } 290 | elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 291 | { 292 | $this->inimage = false; 293 | } 294 | elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 295 | { 296 | $this->incontent = false; 297 | } 298 | elseif ($el == 'channel' or $el == 'feed' ) 299 | { 300 | $this->inchannel = false; 301 | } 302 | elseif ($this->feed_type == ATOM and $this->incontent ) { 303 | // balance tags properly 304 | // note: i don't think this is actually neccessary 305 | if ( $this->stack[0] == $el ) 306 | { 307 | $this->append_content(""); 308 | } 309 | else { 310 | $this->append_content("<$el />"); 311 | } 312 | 313 | array_shift( $this->stack ); 314 | } 315 | else { 316 | array_shift( $this->stack ); 317 | } 318 | 319 | $this->current_namespace = false; 320 | } 321 | 322 | function concat (&$str1, $str2="") { 323 | if (!isset($str1) ) { 324 | $str1=""; 325 | } 326 | $str1 .= $str2; 327 | } 328 | 329 | 330 | 331 | function append_content($text) { 332 | if ( $this->initem ) { 333 | $this->concat( $this->current_item[ $this->incontent ], $text ); 334 | } 335 | elseif ( $this->inchannel ) { 336 | $this->concat( $this->channel[ $this->incontent ], $text ); 337 | } 338 | } 339 | 340 | // smart append - field and namespace aware 341 | function append($el, $text) { 342 | if (!$el) { 343 | return; 344 | } 345 | if ( $this->current_namespace ) 346 | { 347 | if ( $this->initem ) { 348 | $this->concat( 349 | $this->current_item[ $this->current_namespace ][ $el ], $text); 350 | } 351 | elseif ($this->inchannel) { 352 | $this->concat( 353 | $this->channel[ $this->current_namespace][ $el ], $text ); 354 | } 355 | elseif ($this->intextinput) { 356 | $this->concat( 357 | $this->textinput[ $this->current_namespace][ $el ], $text ); 358 | } 359 | elseif ($this->inimage) { 360 | $this->concat( 361 | $this->image[ $this->current_namespace ][ $el ], $text ); 362 | } 363 | } 364 | else { 365 | if ( $this->initem ) { 366 | $this->concat( 367 | $this->current_item[ $el ], $text); 368 | } 369 | elseif ($this->intextinput) { 370 | $this->concat( 371 | $this->textinput[ $el ], $text ); 372 | } 373 | elseif ($this->inimage) { 374 | $this->concat( 375 | $this->image[ $el ], $text ); 376 | } 377 | elseif ($this->inchannel) { 378 | $this->concat( 379 | $this->channel[ $el ], $text ); 380 | } 381 | 382 | } 383 | } 384 | 385 | function normalize () { 386 | // if atom populate rss fields 387 | if ( $this->is_atom() ) { 388 | $this->channel['description'] = $this->channel['tagline']; 389 | for ( $i = 0; $i < count($this->items); $i++) { 390 | $item = $this->items[$i]; 391 | if ( isset($item['summary']) ) 392 | $item['description'] = $item['summary']; 393 | if ( isset($item['atom_content'])) 394 | $item['content']['encoded'] = $item['atom_content']; 395 | 396 | $atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; 397 | if ( $atom_date ) { 398 | $epoch = @parse_w3cdtf($atom_date); 399 | if ($epoch and $epoch > 0) { 400 | $item['date_timestamp'] = $epoch; 401 | } 402 | } 403 | 404 | $this->items[$i] = $item; 405 | } 406 | } 407 | elseif ( $this->is_rss() ) { 408 | $this->channel['tagline'] = $this->channel['description']; 409 | for ( $i = 0; $i < count($this->items); $i++) { 410 | $item = $this->items[$i]; 411 | if ( isset($item['description'])) 412 | $item['summary'] = $item['description']; 413 | if ( isset($item['content']['encoded'] ) ) 414 | $item['atom_content'] = $item['content']['encoded']; 415 | 416 | if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { 417 | $epoch = @parse_w3cdtf($item['dc']['date']); 418 | if ($epoch and $epoch > 0) { 419 | $item['date_timestamp'] = $epoch; 420 | } 421 | } 422 | elseif ( isset($item['pubdate']) ) { 423 | $epoch = @strtotime($item['pubdate']); 424 | if ($epoch > 0) { 425 | $item['date_timestamp'] = $epoch; 426 | } 427 | } 428 | 429 | $this->items[$i] = $item; 430 | } 431 | } 432 | } 433 | 434 | 435 | function is_rss () { 436 | if ( $this->feed_type == RSS ) { 437 | return $this->feed_version; 438 | } 439 | else { 440 | return false; 441 | } 442 | } 443 | 444 | function is_atom() { 445 | if ( $this->feed_type == ATOM ) { 446 | return $this->feed_version; 447 | } 448 | else { 449 | return false; 450 | } 451 | } 452 | 453 | /** 454 | * return XML parser, and possibly re-encoded source 455 | * 456 | */ 457 | function create_parser($source, $out_enc, $in_enc, $detect) { 458 | if ( substr(phpversion(),0,1) == 5) { 459 | $parser = $this->php5_create_parser($in_enc, $detect); 460 | } 461 | else { 462 | list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); 463 | } 464 | if ($out_enc) { 465 | $this->encoding = $out_enc; 466 | xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); 467 | } 468 | 469 | return array($parser, $source); 470 | } 471 | 472 | /** 473 | * Instantiate an XML parser under PHP5 474 | * 475 | * PHP5 will do a fine job of detecting input encoding 476 | * if passed an empty string as the encoding. 477 | * 478 | * All hail libxml2! 479 | * 480 | */ 481 | function php5_create_parser($in_enc, $detect) { 482 | // by default php5 does a fine job of detecting input encodings 483 | if(!$detect && $in_enc) { 484 | return xml_parser_create($in_enc); 485 | } 486 | else { 487 | return xml_parser_create(''); 488 | } 489 | } 490 | 491 | /** 492 | * Instaniate an XML parser under PHP4 493 | * 494 | * Unfortunately PHP4's support for character encodings 495 | * and especially XML and character encodings sucks. As 496 | * long as the documents you parse only contain characters 497 | * from the ISO-8859-1 character set (a superset of ASCII, 498 | * and a subset of UTF-8) you're fine. However once you 499 | * step out of that comfy little world things get mad, bad, 500 | * and dangerous to know. 501 | * 502 | * The following code is based on SJM's work with FoF 503 | * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss 504 | * 505 | */ 506 | function php4_create_parser($source, $in_enc, $detect) { 507 | if ( !$detect ) { 508 | return array(xml_parser_create($in_enc), $source); 509 | } 510 | 511 | if (!$in_enc) { 512 | if (preg_match('//m', $source, $m)) { 513 | $in_enc = strtoupper($m[1]); 514 | $this->source_encoding = $in_enc; 515 | } 516 | else { 517 | $in_enc = 'UTF-8'; 518 | } 519 | } 520 | 521 | if ($this->known_encoding($in_enc)) { 522 | return array(xml_parser_create($in_enc), $source); 523 | } 524 | 525 | // the dectected encoding is not one of the simple encodings PHP knows 526 | 527 | // attempt to use the iconv extension to 528 | // cast the XML to a known encoding 529 | // @see http://php.net/iconv 530 | 531 | if (function_exists('iconv')) { 532 | $encoded_source = iconv($in_enc,'UTF-8', $source); 533 | if ($encoded_source) { 534 | return array(xml_parser_create('UTF-8'), $encoded_source); 535 | } 536 | } 537 | 538 | // iconv didn't work, try mb_convert_encoding 539 | // @see http://php.net/mbstring 540 | if(function_exists('mb_convert_encoding')) { 541 | $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); 542 | if ($encoded_source) { 543 | return array(xml_parser_create('UTF-8'), $encoded_source); 544 | } 545 | } 546 | 547 | // else 548 | $this->error("Feed is in an unsupported character encoding. ($in_enc) " . 549 | "You may see strange artifacts, and mangled characters.", 550 | E_USER_NOTICE); 551 | 552 | return array(xml_parser_create(), $source); 553 | } 554 | 555 | function known_encoding($enc) { 556 | $enc = strtoupper($enc); 557 | if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { 558 | return $enc; 559 | } 560 | else { 561 | return false; 562 | } 563 | } 564 | 565 | function error ($errormsg, $lvl=E_USER_WARNING) { 566 | // append PHP's error message if track_errors enabled 567 | if ( isset($php_errormsg) ) { 568 | $errormsg .= " ($php_errormsg)"; 569 | } 570 | if ( MAGPIE_DEBUG ) { 571 | trigger_error( $errormsg, $lvl); 572 | } 573 | else { 574 | error_log( $errormsg, 0); 575 | } 576 | 577 | $notices = E_USER_NOTICE|E_NOTICE; 578 | if ( $lvl&$notices ) { 579 | $this->WARNING = $errormsg; 580 | } else { 581 | $this->ERROR = $errormsg; 582 | } 583 | } 584 | 585 | 586 | } // end class RSS 587 | 588 | function map_attrs($k, $v) { 589 | return "$k=\"$v\""; 590 | } 591 | 592 | // patch to support medieval versions of PHP4.1.x, 593 | // courtesy, Ryan Currie, ryan@digibliss.com 594 | 595 | if (!function_exists('array_change_key_case')) { 596 | define("CASE_UPPER",1); 597 | define("CASE_LOWER",0); 598 | 599 | 600 | function array_change_key_case($array,$case=CASE_LOWER) { 601 | if ($case=CASE_LOWER) $cmd=strtolower; 602 | elseif ($case=CASE_UPPER) $cmd=strtoupper; 603 | foreach($array as $key=>$value) { 604 | $output[$cmd($key)]=$value; 605 | } 606 | return $output; 607 | } 608 | 609 | } 610 | 611 | ?> 612 | -------------------------------------------------------------------------------- /magpie/extlib/Snoopy.class.inc: -------------------------------------------------------------------------------- 1 | 7 | Copyright (c): 1999-2000 ispi, all rights reserved 8 | Version: 1.0 9 | 10 | * This library is free software; you can redistribute it and/or 11 | * modify it under the terms of the GNU Lesser General Public 12 | * License as published by the Free Software Foundation; either 13 | * version 2.1 of the License, or (at your option) any later version. 14 | * 15 | * This library is distributed in the hope that it will be useful, 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 | * Lesser General Public License for more details. 19 | * 20 | * You should have received a copy of the GNU Lesser General Public 21 | * License along with this library; if not, write to the Free Software 22 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23 | 24 | You may contact the author of Snoopy by e-mail at: 25 | monte@ispi.net 26 | 27 | Or, write to: 28 | Monte Ohrt 29 | CTO, ispi 30 | 237 S. 70th suite 220 31 | Lincoln, NE 68510 32 | 33 | The latest version of Snoopy can be obtained from: 34 | http://snoopy.sourceforge.com 35 | 36 | *************************************************/ 37 | 38 | class Snoopy 39 | { 40 | /**** Public variables ****/ 41 | 42 | /* user definable vars */ 43 | 44 | var $host = "www.php.net"; // host name we are connecting to 45 | var $port = 80; // port we are connecting to 46 | var $proxy_host = ""; // proxy host to use 47 | var $proxy_port = ""; // proxy port to use 48 | var $agent = "Snoopy v1.0"; // agent we masquerade as 49 | var $referer = ""; // referer info to pass 50 | var $cookies = array(); // array of cookies to pass 51 | // $cookies["username"]="joe"; 52 | var $rawheaders = array(); // array of raw headers to send 53 | // $rawheaders["Content-type"]="text/html"; 54 | 55 | var $maxredirs = 5; // http redirection depth maximum. 0 = disallow 56 | var $lastredirectaddr = ""; // contains address of last redirected address 57 | var $offsiteok = true; // allows redirection off-site 58 | var $maxframes = 0; // frame content depth maximum. 0 = disallow 59 | var $expandlinks = true; // expand links to fully qualified URLs. 60 | // this only applies to fetchlinks() 61 | // or submitlinks() 62 | var $passcookies = true; // pass set cookies back through redirects 63 | // NOTE: this currently does not respect 64 | // dates, domains or paths. 65 | 66 | var $user = ""; // user for http authentication 67 | var $pass = ""; // password for http authentication 68 | 69 | // http accept types 70 | var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; 71 | 72 | var $results = ""; // where the content is put 73 | 74 | var $error = ""; // error messages sent here 75 | var $response_code = ""; // response code returned from server 76 | var $headers = array(); // headers returned from server sent here 77 | var $maxlength = 500000; // max return data length (body) 78 | var $read_timeout = 0; // timeout on read operations, in seconds 79 | // supported only since PHP 4 Beta 4 80 | // set to 0 to disallow timeouts 81 | var $timed_out = false; // if a read operation timed out 82 | var $status = 0; // http request status 83 | 84 | var $curl_path = "/usr/bin/curl"; 85 | // Snoopy will use cURL for fetching 86 | // SSL content if a full system path to 87 | // the cURL binary is supplied here. 88 | // set to false if you do not have 89 | // cURL installed. See http://curl.haxx.se 90 | // for details on installing cURL. 91 | // Snoopy does *not* use the cURL 92 | // library functions built into php, 93 | // as these functions are not stable 94 | // as of this Snoopy release. 95 | 96 | // send Accept-encoding: gzip? 97 | var $use_gzip = true; 98 | 99 | /**** Private variables ****/ 100 | 101 | var $_maxlinelen = 4096; // max line length (headers) 102 | 103 | var $_httpmethod = "GET"; // default http request method 104 | var $_httpversion = "HTTP/1.0"; // default http request version 105 | var $_submit_method = "POST"; // default submit method 106 | var $_submit_type = "application/x-www-form-urlencoded"; // default submit type 107 | var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type 108 | var $_redirectaddr = false; // will be set if page fetched is a redirect 109 | var $_redirectdepth = 0; // increments on an http redirect 110 | var $_frameurls = array(); // frame src urls 111 | var $_framedepth = 0; // increments on frame depth 112 | 113 | var $_isproxy = false; // set if using a proxy server 114 | var $_fp_timeout = 30; // timeout for socket connection 115 | 116 | /*======================================================================*\ 117 | Function: fetch 118 | Purpose: fetch the contents of a web page 119 | (and possibly other protocols in the 120 | future like ftp, nntp, gopher, etc.) 121 | Input: $URI the location of the page to fetch 122 | Output: $this->results the output text from the fetch 123 | \*======================================================================*/ 124 | 125 | function fetch($URI) 126 | { 127 | 128 | //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); 129 | $URI_PARTS = parse_url($URI); 130 | if (!empty($URI_PARTS["user"])) 131 | $this->user = $URI_PARTS["user"]; 132 | if (!empty($URI_PARTS["pass"])) 133 | $this->pass = $URI_PARTS["pass"]; 134 | 135 | switch($URI_PARTS["scheme"]) 136 | { 137 | case "http": 138 | $this->host = $URI_PARTS["host"]; 139 | if(!empty($URI_PARTS["port"])) 140 | $this->port = $URI_PARTS["port"]; 141 | if($this->_connect($fp)) 142 | { 143 | if($this->_isproxy) 144 | { 145 | // using proxy, send entire URI 146 | $this->_httprequest($URI,$fp,$URI,$this->_httpmethod); 147 | } 148 | else 149 | { 150 | $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); 151 | // no proxy, send only the path 152 | $this->_httprequest($path, $fp, $URI, $this->_httpmethod); 153 | } 154 | 155 | $this->_disconnect($fp); 156 | 157 | if($this->_redirectaddr) 158 | { 159 | /* url was redirected, check if we've hit the max depth */ 160 | if($this->maxredirs > $this->_redirectdepth) 161 | { 162 | // only follow redirect if it's on this site, or offsiteok is true 163 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 164 | { 165 | /* follow the redirect */ 166 | $this->_redirectdepth++; 167 | $this->lastredirectaddr=$this->_redirectaddr; 168 | $this->fetch($this->_redirectaddr); 169 | } 170 | } 171 | } 172 | 173 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 174 | { 175 | $frameurls = $this->_frameurls; 176 | $this->_frameurls = array(); 177 | 178 | while(list(,$frameurl) = each($frameurls)) 179 | { 180 | if($this->_framedepth < $this->maxframes) 181 | { 182 | $this->fetch($frameurl); 183 | $this->_framedepth++; 184 | } 185 | else 186 | break; 187 | } 188 | } 189 | } 190 | else 191 | { 192 | return false; 193 | } 194 | return true; 195 | break; 196 | case "https": 197 | if(!$this->curl_path || (!is_executable($this->curl_path))) { 198 | $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; 199 | return false; 200 | } 201 | $this->host = $URI_PARTS["host"]; 202 | if(!empty($URI_PARTS["port"])) 203 | $this->port = $URI_PARTS["port"]; 204 | if($this->_isproxy) 205 | { 206 | // using proxy, send entire URI 207 | $this->_httpsrequest($URI,$URI,$this->_httpmethod); 208 | } 209 | else 210 | { 211 | $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 212 | // no proxy, send only the path 213 | $this->_httpsrequest($path, $URI, $this->_httpmethod); 214 | } 215 | 216 | if($this->_redirectaddr) 217 | { 218 | /* url was redirected, check if we've hit the max depth */ 219 | if($this->maxredirs > $this->_redirectdepth) 220 | { 221 | // only follow redirect if it's on this site, or offsiteok is true 222 | if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) 223 | { 224 | /* follow the redirect */ 225 | $this->_redirectdepth++; 226 | $this->lastredirectaddr=$this->_redirectaddr; 227 | $this->fetch($this->_redirectaddr); 228 | } 229 | } 230 | } 231 | 232 | if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 233 | { 234 | $frameurls = $this->_frameurls; 235 | $this->_frameurls = array(); 236 | 237 | while(list(,$frameurl) = each($frameurls)) 238 | { 239 | if($this->_framedepth < $this->maxframes) 240 | { 241 | $this->fetch($frameurl); 242 | $this->_framedepth++; 243 | } 244 | else 245 | break; 246 | } 247 | } 248 | return true; 249 | break; 250 | default: 251 | // not a valid protocol 252 | $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 253 | return false; 254 | break; 255 | } 256 | return true; 257 | } 258 | 259 | 260 | 261 | /*======================================================================*\ 262 | Private functions 263 | \*======================================================================*/ 264 | 265 | 266 | /*======================================================================*\ 267 | Function: _striplinks 268 | Purpose: strip the hyperlinks from an html document 269 | Input: $document document to strip. 270 | Output: $match an array of the links 271 | \*======================================================================*/ 272 | 273 | function _striplinks($document) 274 | { 275 | preg_match_all("'<\s*a\s+.*href\s*=\s* # find ]+)) # if quote found, match up to next matching 278 | # quote, otherwise match up to next space 279 | 'isx",$document,$links); 280 | 281 | 282 | // catenate the non-empty matches from the conditional subpattern 283 | 284 | while(list($key,$val) = each($links[2])) 285 | { 286 | if(!empty($val)) 287 | $match[] = $val; 288 | } 289 | 290 | while(list($key,$val) = each($links[3])) 291 | { 292 | if(!empty($val)) 293 | $match[] = $val; 294 | } 295 | 296 | // return the links 297 | return $match; 298 | } 299 | 300 | /*======================================================================*\ 301 | Function: _stripform 302 | Purpose: strip the form elements from an html document 303 | Input: $document document to strip. 304 | Output: $match an array of the links 305 | \*======================================================================*/ 306 | 307 | function _stripform($document) 308 | { 309 | preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); 310 | 311 | // catenate the matches 312 | $match = implode("\r\n",$elements[0]); 313 | 314 | // return the links 315 | return $match; 316 | } 317 | 318 | 319 | 320 | /*======================================================================*\ 321 | Function: _striptext 322 | Purpose: strip the text from an html document 323 | Input: $document document to strip. 324 | Output: $text the resulting text 325 | \*======================================================================*/ 326 | 327 | function _striptext($document) 328 | { 329 | 330 | // I didn't use preg eval (//e) since that is only available in PHP 4.0. 331 | // so, list your entities one by one here. I included some of the 332 | // more common ones. 333 | 334 | $search = array("']*?>.*?'si", // strip out javascript 335 | "'<[\/\!]*?[^<>]*?>'si", // strip out html tags 336 | "'([\r\n])[\s]+'", // strip out white space 337 | "'&(quote|#34);'i", // replace html entities 338 | "'&(amp|#38);'i", 339 | "'&(lt|#60);'i", 340 | "'&(gt|#62);'i", 341 | "'&(nbsp|#160);'i", 342 | "'&(iexcl|#161);'i", 343 | "'&(cent|#162);'i", 344 | "'&(pound|#163);'i", 345 | "'&(copy|#169);'i" 346 | ); 347 | $replace = array( "", 348 | "", 349 | "\\1", 350 | "\"", 351 | "&", 352 | "<", 353 | ">", 354 | " ", 355 | chr(161), 356 | chr(162), 357 | chr(163), 358 | chr(169)); 359 | 360 | $text = preg_replace($search,$replace,$document); 361 | 362 | return $text; 363 | } 364 | 365 | /*======================================================================*\ 366 | Function: _expandlinks 367 | Purpose: expand each link into a fully qualified URL 368 | Input: $links the links to qualify 369 | $URI the full URI to get the base from 370 | Output: $expandedLinks the expanded links 371 | \*======================================================================*/ 372 | 373 | function _expandlinks($links,$URI) 374 | { 375 | 376 | preg_match("/^[^\?]+/",$URI,$match); 377 | 378 | $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); 379 | 380 | $search = array( "|^http://".preg_quote($this->host)."|i", 381 | "|^(?!http://)(\/)?(?!mailto:)|i", 382 | "|/\./|", 383 | "|/[^\/]+/\.\./|" 384 | ); 385 | 386 | $replace = array( "", 387 | $match."/", 388 | "/", 389 | "/" 390 | ); 391 | 392 | $expandedLinks = preg_replace($search,$replace,$links); 393 | 394 | return $expandedLinks; 395 | } 396 | 397 | /*======================================================================*\ 398 | Function: _httprequest 399 | Purpose: go get the http data from the server 400 | Input: $url the url to fetch 401 | $fp the current open file pointer 402 | $URI the full URI 403 | $body body contents to send if any (POST) 404 | Output: 405 | \*======================================================================*/ 406 | 407 | function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") 408 | { 409 | if($this->passcookies && $this->_redirectaddr) 410 | $this->setcookies(); 411 | 412 | $URI_PARTS = parse_url($URI); 413 | if(empty($url)) 414 | $url = "/"; 415 | $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; 416 | if(!empty($this->agent)) 417 | $headers .= "User-Agent: ".$this->agent."\r\n"; 418 | if(!empty($this->host) && !isset($this->rawheaders['Host'])) 419 | $headers .= "Host: ".$this->host."\r\n"; 420 | if(!empty($this->accept)) 421 | $headers .= "Accept: ".$this->accept."\r\n"; 422 | 423 | if($this->use_gzip) { 424 | // make sure PHP was built with --with-zlib 425 | // and we can handle gzipp'ed data 426 | if ( function_exists(gzinflate) ) { 427 | $headers .= "Accept-encoding: gzip\r\n"; 428 | } 429 | else { 430 | trigger_error( 431 | "use_gzip is on, but PHP was built without zlib support.". 432 | " Requesting file(s) without gzip encoding.", 433 | E_USER_NOTICE); 434 | } 435 | } 436 | 437 | if(!empty($this->referer)) 438 | $headers .= "Referer: ".$this->referer."\r\n"; 439 | if(!empty($this->cookies)) 440 | { 441 | if(!is_array($this->cookies)) 442 | $this->cookies = (array)$this->cookies; 443 | 444 | reset($this->cookies); 445 | if ( count($this->cookies) > 0 ) { 446 | $cookie_headers .= 'Cookie: '; 447 | foreach ( $this->cookies as $cookieKey => $cookieVal ) { 448 | $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; 449 | } 450 | $headers .= substr($cookie_headers,0,-2) . "\r\n"; 451 | } 452 | } 453 | if(!empty($this->rawheaders)) 454 | { 455 | if(!is_array($this->rawheaders)) 456 | $this->rawheaders = (array)$this->rawheaders; 457 | while(list($headerKey,$headerVal) = each($this->rawheaders)) 458 | $headers .= $headerKey.": ".$headerVal."\r\n"; 459 | } 460 | if(!empty($content_type)) { 461 | $headers .= "Content-type: $content_type"; 462 | if ($content_type == "multipart/form-data") 463 | $headers .= "; boundary=".$this->_mime_boundary; 464 | $headers .= "\r\n"; 465 | } 466 | if(!empty($body)) 467 | $headers .= "Content-length: ".strlen($body)."\r\n"; 468 | if(!empty($this->user) || !empty($this->pass)) 469 | $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; 470 | 471 | $headers .= "\r\n"; 472 | 473 | // set the read timeout if needed 474 | if ($this->read_timeout > 0) 475 | socket_set_timeout($fp, $this->read_timeout); 476 | $this->timed_out = false; 477 | 478 | fwrite($fp,$headers.$body,strlen($headers.$body)); 479 | 480 | $this->_redirectaddr = false; 481 | unset($this->headers); 482 | 483 | // content was returned gzip encoded? 484 | $is_gzipped = false; 485 | 486 | while($currentHeader = fgets($fp,$this->_maxlinelen)) 487 | { 488 | if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 489 | { 490 | $this->status=-100; 491 | return false; 492 | } 493 | 494 | // if($currentHeader == "\r\n") 495 | if(preg_match("/^\r?\n$/", $currentHeader) ) 496 | break; 497 | 498 | // if a header begins with Location: or URI:, set the redirect 499 | if(preg_match("/^(Location:|URI:)/i",$currentHeader)) 500 | { 501 | // get URL portion of the redirect 502 | preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); 503 | // look for :// in the Location header to see if hostname is included 504 | if(!preg_match("|\:\/\/|",$matches[2])) 505 | { 506 | // no host in the path, so prepend 507 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 508 | // eliminate double slash 509 | if(!preg_match("|^/|",$matches[2])) 510 | $this->_redirectaddr .= "/".$matches[2]; 511 | else 512 | $this->_redirectaddr .= $matches[2]; 513 | } 514 | else 515 | $this->_redirectaddr = $matches[2]; 516 | } 517 | 518 | if(preg_match("|^HTTP/|",$currentHeader)) 519 | { 520 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) 521 | { 522 | $this->status= $status[1]; 523 | } 524 | $this->response_code = $currentHeader; 525 | } 526 | 527 | if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { 528 | $is_gzipped = true; 529 | } 530 | 531 | $this->headers[] = $currentHeader; 532 | } 533 | 534 | # $results = fread($fp, $this->maxlength); 535 | $results = ""; 536 | while ( $data = fread($fp, $this->maxlength) ) { 537 | $results .= $data; 538 | if ( 539 | strlen($results) > $this->maxlength ) { 540 | break; 541 | } 542 | } 543 | 544 | // gunzip 545 | if ( $is_gzipped ) { 546 | // per http://www.php.net/manual/en/function.gzencode.php 547 | $results = substr($results, 10); 548 | $results = gzinflate($results); 549 | } 550 | 551 | if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 552 | { 553 | $this->status=-100; 554 | return false; 555 | } 556 | 557 | // check if there is a a redirect meta tag 558 | 559 | if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) 560 | { 561 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI); 562 | } 563 | 564 | // have we hit our frame depth and is there frame src to fetch? 565 | if(($this->_framedepth < $this->maxframes) && preg_match_all("']+)'i",$results,$match)) 566 | { 567 | $this->results[] = $results; 568 | for($x=0; $x_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); 570 | } 571 | // have we already fetched framed content? 572 | elseif(is_array($this->results)) 573 | $this->results[] = $results; 574 | // no framed content 575 | else 576 | $this->results = $results; 577 | 578 | return true; 579 | } 580 | 581 | /*======================================================================*\ 582 | Function: _httpsrequest 583 | Purpose: go get the https data from the server using curl 584 | Input: $url the url to fetch 585 | $URI the full URI 586 | $body body contents to send if any (POST) 587 | Output: 588 | \*======================================================================*/ 589 | 590 | function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") 591 | { 592 | if($this->passcookies && $this->_redirectaddr) 593 | $this->setcookies(); 594 | 595 | $headers = array(); 596 | 597 | $URI_PARTS = parse_url($URI); 598 | if(empty($url)) 599 | $url = "/"; 600 | // GET ... header not needed for curl 601 | //$headers[] = $http_method." ".$url." ".$this->_httpversion; 602 | if(!empty($this->agent)) 603 | $headers[] = "User-Agent: ".$this->agent; 604 | if(!empty($this->host)) 605 | $headers[] = "Host: ".$this->host; 606 | if(!empty($this->accept)) 607 | $headers[] = "Accept: ".$this->accept; 608 | if(!empty($this->referer)) 609 | $headers[] = "Referer: ".$this->referer; 610 | if(!empty($this->cookies)) 611 | { 612 | if(!is_array($this->cookies)) 613 | $this->cookies = (array)$this->cookies; 614 | 615 | reset($this->cookies); 616 | if ( count($this->cookies) > 0 ) { 617 | $cookie_str = 'Cookie: '; 618 | foreach ( $this->cookies as $cookieKey => $cookieVal ) { 619 | $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; 620 | } 621 | $headers[] = substr($cookie_str,0,-2); 622 | } 623 | } 624 | if(!empty($this->rawheaders)) 625 | { 626 | if(!is_array($this->rawheaders)) 627 | $this->rawheaders = (array)$this->rawheaders; 628 | while(list($headerKey,$headerVal) = each($this->rawheaders)) 629 | $headers[] = $headerKey.": ".$headerVal; 630 | } 631 | if(!empty($content_type)) { 632 | if ($content_type == "multipart/form-data") 633 | $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; 634 | else 635 | $headers[] = "Content-type: $content_type"; 636 | } 637 | if(!empty($body)) 638 | $headers[] = "Content-length: ".strlen($body); 639 | if(!empty($this->user) || !empty($this->pass)) 640 | $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); 641 | 642 | for($curr_header = 0; $curr_header < count($headers); $curr_header++) { 643 | $cmdline_params .= " -H \"".$headers[$curr_header]."\""; 644 | } 645 | 646 | if(!empty($body)) 647 | $cmdline_params .= " -d \"$body\""; 648 | 649 | if($this->read_timeout > 0) 650 | $cmdline_params .= " -m ".$this->read_timeout; 651 | 652 | $headerfile = uniqid(time()); 653 | 654 | # accept self-signed certs 655 | $cmdline_params .= " -k"; 656 | exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); 657 | 658 | if($return) 659 | { 660 | $this->error = "Error: cURL could not retrieve the document, error $return."; 661 | return false; 662 | } 663 | 664 | 665 | $results = implode("\r\n",$results); 666 | 667 | $result_headers = file("/tmp/$headerfile"); 668 | 669 | $this->_redirectaddr = false; 670 | unset($this->headers); 671 | 672 | for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) 673 | { 674 | 675 | // if a header begins with Location: or URI:, set the redirect 676 | if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) 677 | { 678 | // get URL portion of the redirect 679 | preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); 680 | // look for :// in the Location header to see if hostname is included 681 | if(!preg_match("|\:\/\/|",$matches[2])) 682 | { 683 | // no host in the path, so prepend 684 | $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 685 | // eliminate double slash 686 | if(!preg_match("|^/|",$matches[2])) 687 | $this->_redirectaddr .= "/".$matches[2]; 688 | else 689 | $this->_redirectaddr .= $matches[2]; 690 | } 691 | else 692 | $this->_redirectaddr = $matches[2]; 693 | } 694 | 695 | if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) 696 | { 697 | $this->response_code = $result_headers[$currentHeader]; 698 | if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) 699 | { 700 | $this->status= $match[1]; 701 | } 702 | } 703 | $this->headers[] = $result_headers[$currentHeader]; 704 | } 705 | 706 | // check if there is a a redirect meta tag 707 | 708 | if(preg_match("']*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) 709 | { 710 | $this->_redirectaddr = $this->_expandlinks($match[1],$URI); 711 | } 712 | 713 | // have we hit our frame depth and is there frame src to fetch? 714 | if(($this->_framedepth < $this->maxframes) && preg_match_all("']+)'i",$results,$match)) 715 | { 716 | $this->results[] = $results; 717 | for($x=0; $x_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); 719 | } 720 | // have we already fetched framed content? 721 | elseif(is_array($this->results)) 722 | $this->results[] = $results; 723 | // no framed content 724 | else 725 | $this->results = $results; 726 | 727 | unlink("/tmp/$headerfile"); 728 | 729 | return true; 730 | } 731 | 732 | /*======================================================================*\ 733 | Function: setcookies() 734 | Purpose: set cookies for a redirection 735 | \*======================================================================*/ 736 | 737 | function setcookies() 738 | { 739 | for($x=0; $xheaders); $x++) 740 | { 741 | if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) 742 | $this->cookies[$match[1]] = $match[2]; 743 | } 744 | } 745 | 746 | 747 | /*======================================================================*\ 748 | Function: _check_timeout 749 | Purpose: checks whether timeout has occurred 750 | Input: $fp file pointer 751 | \*======================================================================*/ 752 | 753 | function _check_timeout($fp) 754 | { 755 | if ($this->read_timeout > 0) { 756 | $fp_status = socket_get_status($fp); 757 | if ($fp_status["timed_out"]) { 758 | $this->timed_out = true; 759 | return true; 760 | } 761 | } 762 | return false; 763 | } 764 | 765 | /*======================================================================*\ 766 | Function: _connect 767 | Purpose: make a socket connection 768 | Input: $fp file pointer 769 | \*======================================================================*/ 770 | 771 | function _connect(&$fp) 772 | { 773 | if(!empty($this->proxy_host) && !empty($this->proxy_port)) 774 | { 775 | $this->_isproxy = true; 776 | $host = $this->proxy_host; 777 | $port = $this->proxy_port; 778 | } 779 | else 780 | { 781 | $host = $this->host; 782 | $port = $this->port; 783 | } 784 | 785 | $this->status = 0; 786 | 787 | if($fp = fsockopen( 788 | $host, 789 | $port, 790 | $errno, 791 | $errstr, 792 | $this->_fp_timeout 793 | )) 794 | { 795 | // socket connection succeeded 796 | 797 | return true; 798 | } 799 | else 800 | { 801 | // socket connection failed 802 | $this->status = $errno; 803 | switch($errno) 804 | { 805 | case -3: 806 | $this->error="socket creation failed (-3)"; 807 | case -4: 808 | $this->error="dns lookup failure (-4)"; 809 | case -5: 810 | $this->error="connection refused or timed out (-5)"; 811 | default: 812 | $this->error="connection failed (".$errno.")"; 813 | } 814 | return false; 815 | } 816 | } 817 | /*======================================================================*\ 818 | Function: _disconnect 819 | Purpose: disconnect a socket connection 820 | Input: $fp file pointer 821 | \*======================================================================*/ 822 | 823 | function _disconnect($fp) 824 | { 825 | return(fclose($fp)); 826 | } 827 | 828 | 829 | /*======================================================================*\ 830 | Function: _prepare_post_body 831 | Purpose: Prepare post body according to encoding type 832 | Input: $formvars - form variables 833 | $formfiles - form upload files 834 | Output: post body 835 | \*======================================================================*/ 836 | 837 | function _prepare_post_body($formvars, $formfiles) 838 | { 839 | settype($formvars, "array"); 840 | settype($formfiles, "array"); 841 | 842 | if (count($formvars) == 0 && count($formfiles) == 0) 843 | return; 844 | 845 | switch ($this->_submit_type) { 846 | case "application/x-www-form-urlencoded": 847 | reset($formvars); 848 | while(list($key,$val) = each($formvars)) { 849 | if (is_array($val) || is_object($val)) { 850 | while (list($cur_key, $cur_val) = each($val)) { 851 | $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; 852 | } 853 | } else 854 | $postdata .= urlencode($key)."=".urlencode($val)."&"; 855 | } 856 | break; 857 | 858 | case "multipart/form-data": 859 | $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); 860 | 861 | reset($formvars); 862 | while(list($key,$val) = each($formvars)) { 863 | if (is_array($val) || is_object($val)) { 864 | while (list($cur_key, $cur_val) = each($val)) { 865 | $postdata .= "--".$this->_mime_boundary."\r\n"; 866 | $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; 867 | $postdata .= "$cur_val\r\n"; 868 | } 869 | } else { 870 | $postdata .= "--".$this->_mime_boundary."\r\n"; 871 | $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; 872 | $postdata .= "$val\r\n"; 873 | } 874 | } 875 | 876 | reset($formfiles); 877 | while (list($field_name, $file_names) = each($formfiles)) { 878 | settype($file_names, "array"); 879 | while (list(, $file_name) = each($file_names)) { 880 | if (!is_readable($file_name)) continue; 881 | 882 | $fp = fopen($file_name, "r"); 883 | $file_content = fread($fp, filesize($file_name)); 884 | fclose($fp); 885 | $base_name = basename($file_name); 886 | 887 | $postdata .= "--".$this->_mime_boundary."\r\n"; 888 | $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; 889 | $postdata .= "$file_content\r\n"; 890 | } 891 | } 892 | $postdata .= "--".$this->_mime_boundary."--\r\n"; 893 | break; 894 | } 895 | 896 | return $postdata; 897 | } 898 | } 899 | 900 | ?> 901 | --------------------------------------------------------------------------------