Browse Source
- Update language detection
- Update language detection
- Add Deutsch language - Remove useless library : MagpieRSSpull/5/head
26 changed files with 147 additions and 4208 deletions
-
144i18n/de.po
-
0i18n/fr.po
-
14lib/MagpieRSS.php
-
1lib/MagpieRSS/AUTHORS
-
41lib/MagpieRSS/CHANGES
-
405lib/MagpieRSS/ChangeLog
-
143lib/MagpieRSS/INSTALL
-
53lib/MagpieRSS/NEWS
-
48lib/MagpieRSS/README
-
152lib/MagpieRSS/TROUBLESHOOTING
-
125lib/MagpieRSS/cookbook
-
900lib/MagpieRSS/extlib/Snoopy.class.inc
-
237lib/MagpieRSS/htdocs/cookbook.html
-
419lib/MagpieRSS/htdocs/index.html
-
200lib/MagpieRSS/rss_cache.inc
-
460lib/MagpieRSS/rss_fetch.inc
-
605lib/MagpieRSS/rss_parse.inc
-
67lib/MagpieRSS/rss_utils.inc
-
27lib/MagpieRSS/scripts/README
-
80lib/MagpieRSS/scripts/magpie_debug.php
-
29lib/MagpieRSS/scripts/magpie_simple.php
-
66lib/MagpieRSS/scripts/magpie_slashbox.php
-
58lib/MagpieRSS/scripts/simple_smarty.php
-
31lib/MagpieRSS/scripts/smarty_plugin/modifier.rss_date_parse.php
-
46lib/MagpieRSS/scripts/templates/simple.smarty
-
4lib/PageBuilder.php
@ -0,0 +1,144 @@ |
|||
# German translations for Movim package. |
|||
# This file is distributed under the same license as the Movim package. |
|||
# This file was translated from CodingTeam at <http://codingteam.net/>. |
|||
msgid "" |
|||
msgstr "" |
|||
"Project-Id-Version: Movim \n" |
|||
"PO-Revision-Date: 2011-01-27 12:17:48\n" |
|||
"MIME-Version: 1.0\n" |
|||
"Content-Type: text/plain; charset=UTF-8\n" |
|||
"Content-Transfer-Encoding: 8bit\n" |
|||
|
|||
#: ../init.php:16 |
|||
msgid "MOVIM - Test Client" |
|||
msgstr "MOVIM – Test-Instanz" |
|||
|
|||
#: ../lib/Dispatcher.php:38 |
|||
msgid "%s - Welcome to Movim" |
|||
msgstr "%s – Willkommen bei Movim" |
|||
|
|||
#: ../lib/Dispatcher.php:3 |
|||
#: ../lib/Dispatcher.php:5 |
|||
#: ../lib/Dispatcher.php:72 |
|||
msgid "Home" |
|||
msgstr "Start" |
|||
|
|||
#: ../lib/Dispatcher.php:4 |
|||
#: ../lib/Dispatcher.php:58 |
|||
msgid "Configuration" |
|||
msgstr "Einstellungen" |
|||
|
|||
#: ../lib/Dispatcher.php:4 |
|||
#: ../lib/Dispatcher.php:59 |
|||
msgid "Logout" |
|||
msgstr "Abmelden" |
|||
|
|||
#: ../lib/Dispatcher.php:56 |
|||
msgid "%s - Configuration" |
|||
msgstr "%s – Einstellungen" |
|||
|
|||
#: ../lib/Dispatcher.php:71 |
|||
msgid "%s - Account Creation" |
|||
msgstr "%s – Registrieren" |
|||
|
|||
#: ../lib/Dispatcher.php:88 |
|||
msgid "%s - Login to Movim" |
|||
msgstr "%s – Anmelden" |
|||
|
|||
#: ../lib/Dispatcher.php:91 |
|||
msgid "Account Creation" |
|||
msgstr "Registrieren" |
|||
|
|||
#: ../lib/Dispatcher.php:95 |
|||
msgid "My address" |
|||
msgstr "Adresse" |
|||
|
|||
#: ../lib/Dispatcher.php:9 |
|||
#: ../lib/widgets/Account.php:35 |
|||
msgid "Password" |
|||
msgstr "Passwort" |
|||
|
|||
#: ../lib/Dispatcher.php:97 |
|||
msgid "Come in!" |
|||
msgstr "Komm‘ herein!" |
|||
|
|||
#: ../lib/GetConf.php:2 |
|||
#: ../lib/GetConf.php:41 |
|||
msgid "Error: Cannot load element value'%s'" |
|||
msgstr "Fehler: Elementwert »%s« konnte nicht geladen werden" |
|||
|
|||
#: ../lib/GetConf.php:59 |
|||
msgid "Error: Cannot load file '%s'" |
|||
msgstr "Fehler: Datei »%s« konnte nicht geladen werden" |
|||
|
|||
#: ../lib/PageBuilder.php:283 |
|||
msgid "Error: Requested widget '%s' doesn't exist." |
|||
msgstr "Fehler: Angefordertes Widget »%s« ist nicht vorhanden." |
|||
|
|||
#: ../lib/User.php:63 |
|||
msgid "Wrong password" |
|||
msgstr "Passwort falsch" |
|||
|
|||
#: ../lib/Widget.php:69 |
|||
msgid "This is a sample widget." |
|||
msgstr "Dies ist ein Beispiel-Widget" |
|||
|
|||
#: ../lib/XMPPConnect.php:147 |
|||
msgid "Error: jid `%s' is incorrect" |
|||
msgstr "Fehler: Falsche jid »%s«" |
|||
|
|||
#: ../lib/widgets/Account.php:3 |
|||
#: ../lib/widgets/Account.php:34 |
|||
msgid "Jabber Account" |
|||
msgstr "Jabber-Konto" |
|||
|
|||
#: ../lib/widgets/Account.php:4 |
|||
#: ../lib/widgets/Config.php:84 |
|||
msgid "Submit" |
|||
msgstr "Übernehmen" |
|||
|
|||
#: ../lib/widgets/Account.php:4 |
|||
#: ../lib/widgets/Config.php:86 |
|||
msgid "Reset" |
|||
msgstr "Zurücksetzen" |
|||
|
|||
#: ../lib/widgets/Chat.php:40 |
|||
msgid "Send" |
|||
msgstr "Absenden" |
|||
|
|||
#: ../lib/widgets/Config.php:39 |
|||
msgid "Language" |
|||
msgstr "Sprache" |
|||
|
|||
#: ../lib/widgets/Config.php:45 |
|||
msgid "Full Name" |
|||
msgstr "Name" |
|||
|
|||
#: ../lib/widgets/Config.php:47 |
|||
msgid "BOSH Connection Prefrences" |
|||
msgstr "Verbindungseinstellungen" |
|||
|
|||
#: ../lib/widgets/Config.php:49 |
|||
msgid "Changing these data can be dangerous and may compromise the connection to the XMPP server" |
|||
msgstr "Eine Änderung dieser Daten kann gefährlich sein und die Verbindung zum XMPP-Server behindern." |
|||
|
|||
#: ../lib/widgets/Config.php:51 |
|||
msgid "Bosh Host" |
|||
msgstr "Server" |
|||
|
|||
#: ../lib/widgets/Config.php:53 |
|||
msgid "Bosh Suffix" |
|||
msgstr "Suffix" |
|||
|
|||
#: ../lib/widgets/Config.php:55 |
|||
msgid "Bosh Port" |
|||
msgstr "Port" |
|||
|
|||
#: ../lib/widgets/Config.php:90 |
|||
msgid "error: " |
|||
msgstr "Fehler:" |
|||
|
|||
#: ../lib/widgets/Friends.php:65 |
|||
msgid "Contacts" |
|||
msgstr "Freunde" |
|||
|
@ -1,14 +0,0 @@ |
|||
<?php |
|||
class Parser |
|||
{ |
|||
function __construct () |
|||
{ |
|||
require_once LIB_PATH.'MagpieRSS/rss_fetch.inc'; |
|||
} |
|||
|
|||
function fetch($url) |
|||
{ |
|||
return fetch_rss($url); |
|||
} |
|||
|
|||
} |
@ -1 +0,0 @@ |
|||
kellan <kellan@protest.net> |
@ -1,41 +0,0 @@ |
|||
Version 0.72 |
|||
----------- |
|||
- fix security exploit: http://www.sec-consult.com/216.html |
|||
|
|||
Version 0.7 |
|||
----------- |
|||
- support for input and output charset encoding |
|||
based on the work in FoF, uses iconv or mbstring if available |
|||
- |
|||
|
|||
Version 0.6 |
|||
----------- |
|||
- basic support for Atom syndication format |
|||
including support for Atom content constructs |
|||
- fixed support for private feeds (HTTP Auth and SSL) |
|||
(thanks to silverorange.com for providing test feeds) |
|||
- support for some broken webservers |
|||
|
|||
Version 0.52 |
|||
----------- |
|||
- support GZIP content negoiation |
|||
- PHP 4.3.2 support |
|||
|
|||
Version 0.4 |
|||
----------- |
|||
- improved error handling, better access for script authors |
|||
- included example scripts of working with MagpieRSS |
|||
- new Smarty plugin for RSS date parsing |
|||
|
|||
Version 0.3 |
|||
----------- |
|||
- added support for conditional gets (Last-Modified, ETag) |
|||
- now use Snoopy to handle fetching RSS files |
|||
|
|||
Version 0.2 |
|||
----------- |
|||
- MAJOR CLEAN UP |
|||
- removed kludgy $options array in favour of constants |
|||
- phased out returning arrays |
|||
- added better error handling |
|||
- re-worked comments |
@ -1,405 +0,0 @@ |
|||
2005-10-28 14:11 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: a better solution |
|||
|
|||
2005-10-28 11:51 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: fix arbtriary code execution |
|||
vulnerability when using curl+ssl |
|||
|
|||
http://www.sec-consult.com/216.html |
|||
|
|||
2005-03-08 10:46 kellan |
|||
|
|||
* rss_parse.inc: fix bug w/ atom and date normalization |
|||
|
|||
2005-02-09 14:59 kellan |
|||
|
|||
* rss_fetch.inc: fix stale cache bug |
|||
|
|||
2005-01-28 02:27 kellan |
|||
|
|||
* rss_parse.inc: support php w/o array_change_case |
|||
|
|||
2005-01-23 20:02 kellan |
|||
|
|||
* rss_fetch.inc: fix cache bug introduced by charset encoding |
|||
|
|||
2005-01-12 09:14 kellan |
|||
|
|||
* rss_cache.inc, rss_fetch.inc: more sanity checks for when things |
|||
go wrong |
|||
|
|||
2004-12-12 13:44 kellan |
|||
|
|||
* INSTALL, rss_cache.inc, rss_utils.inc: detab |
|||
|
|||
2004-11-23 20:15 kellan |
|||
|
|||
* rss_parse.inc: fix calling iconv instead of mb_convert_encoding |
|||
|
|||
2004-11-22 02:11 kellan |
|||
|
|||
* CHANGES, ChangeLog, rss_parse.inc, scripts/magpie_debug.php: last |
|||
bit of tidying |
|||
|
|||
2004-11-22 01:45 kellan |
|||
|
|||
* rss_fetch.inc: detab, bump version |
|||
|
|||
2004-11-22 01:43 kellan |
|||
|
|||
* rss_parse.inc: was filtering too much |
|||
|
|||
2004-11-22 00:03 kellan |
|||
|
|||
* rss_fetch.inc, rss_parse.inc: cache on $url . $output_encoding |
|||
otherwise we can get munged output |
|||
|
|||
2004-11-21 23:52 kellan |
|||
|
|||
* rss_parse.inc: add WARNING |
|||
|
|||
2004-11-21 23:45 kellan |
|||
|
|||
* rss_parse.inc: don't set ERROR on notice or warning (rss_fetch |
|||
dies on parse errors) |
|||
|
|||
2004-11-21 23:44 kellan |
|||
|
|||
* rss_fetch.inc: add encoding defines (fix timeout error reporting) |
|||
|
|||
2004-11-21 20:21 kellan |
|||
|
|||
* rss_parse.inc: incorporate steve's patch |
|||
|
|||
2004-11-21 19:26 kellan |
|||
|
|||
* rss_parse.inc: remove old debugging functions, totally |
|||
arbitrarily. might break stuff. can't really explain why i'm |
|||
doing this. |
|||
|
|||
2004-10-28 15:52 kellan |
|||
|
|||
* rss_parse.inc: fixed '=' instead of '==' |
|||
|
|||
2004-10-26 00:48 kellan |
|||
|
|||
* rss_parse.inc: chance epoch to timestamp to conform w/ php naming |
|||
conventions |
|||
|
|||
2004-06-15 12:00 kellan |
|||
|
|||
* rss_parse.inc: [no log message] |
|||
|
|||
2004-04-26 14:16 kellan |
|||
|
|||
* rss_fetch.inc: bump version |
|||
|
|||
2004-04-26 12:36 kellan |
|||
|
|||
* rss_parse.inc: fix field doubling |
|||
|
|||
2004-04-24 17:47 kellan |
|||
|
|||
* CHANGES, ChangeLog: updated |
|||
|
|||
2004-04-24 17:35 kellan |
|||
|
|||
* rss_fetch.inc: bumped version |
|||
|
|||
2004-04-24 16:52 kellan |
|||
|
|||
* rss_parse.inc: support arbitrary atom content constructs |
|||
|
|||
some refactoring |
|||
|
|||
2004-04-24 16:15 kellan |
|||
|
|||
* rss_parse.inc: support summary content contstruct. add normalize |
|||
function |
|||
|
|||
2004-03-27 16:29 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: accept self-signed certs |
|||
|
|||
2004-03-27 12:53 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: fixed SSL support * set status * set |
|||
error on bad curl |
|||
|
|||
(also ripped out big chunks of dead weight (submit_form) which |
|||
were getting in my way |
|||
|
|||
2004-01-25 02:25 kellan |
|||
|
|||
* rss_parse.inc: make RSS 1.0's rdf:about available |
|||
|
|||
2004-01-25 02:07 kellan |
|||
|
|||
* rss_parse.inc: clean up text, and line formats. add support item |
|||
rdf:about |
|||
|
|||
2004-01-24 23:40 kellan |
|||
|
|||
* CHANGES, ChangeLog: update changes |
|||
|
|||
2004-01-24 23:37 kellan |
|||
|
|||
* rss_fetch.inc: updated version |
|||
|
|||
2004-01-24 23:35 kellan |
|||
|
|||
* rss_parse.inc: whitespace |
|||
|
|||
2004-01-24 23:23 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: support badly formatted http headers |
|||
|
|||
2004-01-24 23:20 kellan |
|||
|
|||
* rss_parse.inc: added alpha atom parsing support |
|||
|
|||
2003-06-25 22:34 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: fixed fread 4.3.2 compatibility problems |
|||
|
|||
2003-06-13 11:31 kellan |
|||
|
|||
* rss_fetch.inc: reset cache on 304 |
|||
|
|||
2003-06-12 21:37 kellan |
|||
|
|||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
|||
bumped up version numbers |
|||
|
|||
2003-06-12 21:32 kellan |
|||
|
|||
* htdocs/index.html: updated news |
|||
|
|||
2003-06-12 21:27 kellan |
|||
|
|||
* NEWS: a manual blog :) |
|||
|
|||
2003-06-12 21:22 kellan |
|||
|
|||
* htdocs/index.html: fully qualified img |
|||
|
|||
2003-06-12 21:20 kellan |
|||
|
|||
* htdocs/index.html: clean up. added badge. |
|||
|
|||
2003-06-12 21:04 kellan |
|||
|
|||
* rss_utils.inc: clean up regex |
|||
|
|||
2003-06-12 21:02 kellan |
|||
|
|||
* rss_cache.inc: suppress some warnings |
|||
|
|||
2003-05-30 20:44 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: more comments, cleaned up notice |
|||
|
|||
2003-05-30 15:14 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: don't advertise gzip support if the user |
|||
hasn't built php with gzinflate support |
|||
|
|||
2003-05-12 22:32 kellan |
|||
|
|||
* ChangeLog: changes |
|||
|
|||
2003-05-12 22:11 kellan |
|||
|
|||
* htdocs/index.html: announce 0.5 |
|||
|
|||
2003-05-12 21:42 kellan |
|||
|
|||
* htdocs/index.html: change |
|||
|
|||
2003-05-12 21:39 kellan |
|||
|
|||
* rss_fetch.inc: use gzip |
|||
|
|||
2003-05-12 21:37 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: added support gzip encoded content |
|||
negoiation |
|||
|
|||
2003-05-12 21:32 kellan |
|||
|
|||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: fixed |
|||
typoes |
|||
|
|||
2003-04-26 21:44 kellan |
|||
|
|||
* rss_parse.inc: fix minor typo |
|||
|
|||
2003-04-18 08:19 kellan |
|||
|
|||
* htdocs/cookbook.html: updated cookbook to show more code for |
|||
limiting items |
|||
|
|||
2003-03-03 16:02 kellan |
|||
|
|||
* rss_parse.inc, scripts/magpie_slashbox.php: committed (or |
|||
adpated) patch from Nicola (www.technick.com) to quell 'Undefined |
|||
Indexes' notices |
|||
|
|||
2003-03-03 15:59 kellan |
|||
|
|||
* rss_fetch.inc: commited patch from nicola (www.technick.com) to |
|||
quell 'undefined indexes' notices. |
|||
|
|||
* Magpie now automatically includes its version in the |
|||
user-agent, & whether cacheing is turned on. |
|||
|
|||
2003-02-12 01:22 kellan |
|||
|
|||
* CHANGES, ChangeLog: ChangeLog now auto-generated by cvs2cl |
|||
|
|||
2003-02-12 00:21 kellan |
|||
|
|||
* rss_fetch.inc: better errors, hopefully stomped on pesky notices |
|||
|
|||
2003-02-12 00:19 kellan |
|||
|
|||
* rss_parse.inc: check to see is xml is supported, if not die |
|||
|
|||
also throw better xml errors |
|||
|
|||
2003-02-12 00:18 kellan |
|||
|
|||
* rss_cache.inc: hopefully cleared up some notices that were being |
|||
thrown into the log |
|||
|
|||
fixed a debug statement that was being called as an error |
|||
|
|||
2003-02-12 00:15 kellan |
|||
|
|||
* scripts/: magpie_simple.php, magpie_slashbox.php: moved |
|||
magpie_simple to magpie_slashbox, and replaced it with a simpler |
|||
demo. |
|||
|
|||
2003-02-12 00:02 kellan |
|||
|
|||
* INSTALL, README, TROUBLESHOOTING: Improved documentation. Better |
|||
install instructions. |
|||
|
|||
TROUBLESHOOTING cover common installation and usage problems |
|||
|
|||
2003-01-22 14:40 kellan |
|||
|
|||
* htdocs/cookbook.html: added cookbook.html |
|||
|
|||
2003-01-21 23:47 kellan |
|||
|
|||
* cookbook: a magpie cookbook |
|||
|
|||
2003-01-20 10:09 kellan |
|||
|
|||
* ChangeLog: updated |
|||
|
|||
2003-01-20 09:23 kellan |
|||
|
|||
* scripts/simple_smarty.php: minor clean up |
|||
|
|||
2003-01-20 09:15 kellan |
|||
|
|||
* scripts/README: added smarty url |
|||
|
|||
2003-01-20 09:14 kellan |
|||
|
|||
* magpie_simple.php, htdocs/index.html, scripts/README, |
|||
scripts/magpie_debug.php, scripts/magpie_simple.php, |
|||
scripts/simple_smarty.php, |
|||
scripts/smarty_plugin/modifier.rss_date_parse.php, |
|||
scripts/templates/simple.smarty: Added scripts directory for |
|||
examples on how to use MagpieRSS |
|||
|
|||
magpie_simple - is a simple example magpie_debug - spew all the |
|||
information from a parsed RSS feed simple_smary - example of |
|||
using magpie with Smarty template system |
|||
smarty_plugin/modifier.rss_date_parse.php - support file for the |
|||
smarty demo templates/simple.smary - template for the smarty demo |
|||
|
|||
2003-01-20 09:11 kellan |
|||
|
|||
* rss_fetch.inc, rss_parse.inc: changes to error handling to give |
|||
script authors more access to magpie's errors. |
|||
|
|||
added method magpie_error() to retrieve global MAGPIE_ERROR |
|||
variable for when fetch_rss() returns false |
|||
|
|||
2002-10-26 19:02 kellan |
|||
|
|||
* htdocs/index.html: putting the website under source control |
|||
|
|||
2002-10-26 18:43 kellan |
|||
|
|||
* AUTHORS, ChangeLog, INSTALL, README: some documentation to make |
|||
it all look official :) |
|||
|
|||
2002-10-25 23:04 kellan |
|||
|
|||
* magpie_simple.php: quxx |
|||
|
|||
2002-10-25 23:04 kellan |
|||
|
|||
* rss_parse.inc: added support for textinput and image |
|||
|
|||
2002-10-25 19:23 kellan |
|||
|
|||
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc, |
|||
rss_utils.inc: switched to using Snoopy for fetching remote RSS |
|||
files. |
|||
|
|||
added support for conditional gets |
|||
|
|||
2002-10-25 19:22 kellan |
|||
|
|||
* rss_cache.inc, rss_fetch.inc, rss_parse.inc, rss_utils.inc: |
|||
Change comment style to slavishly imitate the phpinsider style |
|||
found in Smarty and Snoopy :) |
|||
|
|||
2002-10-25 19:18 kellan |
|||
|
|||
* extlib/Snoopy.class.inc: added Snoopy in order to support |
|||
conditional gets |
|||
|
|||
2002-10-23 23:19 kellan |
|||
|
|||
* magpie_simple.php, rss_cache.inc, rss_fetch.inc, rss_parse.inc: |
|||
MAJOR CLEANUP! |
|||
|
|||
* rss_fetch got rid of the options array, replaced it with a more |
|||
PHP-like solution of using defines. constants are setup, with |
|||
defaults, in the function init() |
|||
|
|||
got rid of the idiom of passing back an array, its was awkward to |
|||
deal with in PHP, and unusual (and consquently confusing to |
|||
people). now i return true/false values, and try to setup error |
|||
string where appropiate (rss_cache has the most complete example |
|||
of this) |
|||
|
|||
change the logic for interacting with the cache |
|||
|
|||
* rss_cache major re-working of how error are handled. tried to |
|||
make the code more resillient. the cache is now much more aware |
|||
of MAX_AGE, where before this was being driven out of rss_fetch |
|||
(which was silly) |
|||
|
|||
* rss_parse properly handles xml parse errors. used to sail |
|||
along blithely unaware. |
|||
|
|||
2002-09-11 11:11 kellan |
|||
|
|||
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
|||
rss_utils.inc: Initial revision |
|||
|
|||
2002-09-11 11:11 kellan |
|||
|
|||
* rss_cache.inc, rss_parse.inc, magpie_simple.php, rss_fetch.inc, |
|||
rss_utils.inc: initial import |
|||
|
@ -1,143 +0,0 @@ |
|||
REQUIREMENTS |
|||
|
|||
MapieRSS requires a recent PHP 4+ (developed with 4.2.0) |
|||
with xml (expat) support. |
|||
|
|||
Optionally: |
|||
* PHP5 with libxml2 support. |
|||
* cURL for SSL support |
|||
* iconv (preferred) or mb_string for expanded character set support |
|||
|
|||
QUICK START |
|||
|
|||
Magpie consists of 4 files (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
|||
and rss_utils.inc), and the directory extlib (which contains a modified |
|||
version of the Snoopy HTTP client) |
|||
|
|||
Copy these 5 resources to a directory named 'magpierss' in the same |
|||
directory as your PHP script. |
|||
|
|||
At the top of your script add the following line: |
|||
|
|||
require_once('magpierss/rss_fetch.inc'); |
|||
|
|||
Now you can use the fetch_rss() method: |
|||
|
|||
$rss = fetch_rss($url); |
|||
|
|||
Done. That's it. See README for more details on using MagpieRSS. |
|||
|
|||
NEXT STEPS |
|||
|
|||
Important: you'll probably want to get the cache directory working in |
|||
order to speed up your application, and not abuse the webserver you're |
|||
downloading the RSS from. |
|||
|
|||
Optionally you can install MagpieRSS in your PHP include path in order to |
|||
make it available server wide. |
|||
|
|||
Lastly you might want to look through the constants in rss_fetch.inc see if |
|||
there is anything you want to override (the defaults are pretty good) |
|||
|
|||
For more info, or if you have trouble, see TROUBLESHOOTING |
|||
|
|||
SETTING UP CACHING |
|||
|
|||
Magpie has built-in transparent caching. With caching Magpie will only |
|||
fetch and parse RSS feeds when there is new content. Without this feature |
|||
your pages will be slow, and the sites serving the RSS feed will be annoyed |
|||
with you. |
|||
|
|||
** Simple and Automatic ** |
|||
|
|||
By default Magpie will try to create a cache directory named 'cache' in the |
|||
same directory as your PHP script. |
|||
|
|||
** Creating a Local Cache Directory ** |
|||
|
|||
Often this will fail, because your webserver doesn't have sufficient |
|||
permissions to create the directory. |
|||
|
|||
Exact instructions for how to do this will vary from install to install and |
|||
platform to platform. The steps are: |
|||
|
|||
1. Make a directory named 'cache' |
|||
2. Give the web server write access to that directory. |
|||
|
|||
An example of how to do this on Debian would be: |
|||
|
|||
1. mkdir /path/to/script/cache |
|||
2. chgrp www-data /path/to/script/cache |
|||
3. chmod 775 /path/to/script/cache |
|||
|
|||
On other Unixes you'll need to change 'www-data' to what ever user Apache |
|||
runs as. (on MacOS X the user would be 'www') |
|||
|
|||
** Cache in /tmp ** |
|||
|
|||
Sometimes you won't be able to create a local cache directory. Some reasons |
|||
might be: |
|||
|
|||
1. No shell account |
|||
2. Insufficient permissions to change ownership of a directory |
|||
3. Webserver runs as 'nobody' |
|||
|
|||
In these situations using a cache directory in /tmp can often be a good |
|||
option. |
|||
|
|||
The drawback is /tmp is public, so anyone on the box can read the cache |
|||
files. Usually RSS feeds are public information, so you'll have to decide |
|||
how much of an issue that is. |
|||
|
|||
To use /tmp as your cache directory you need to add the following line to |
|||
your script: |
|||
|
|||
define('MAGPIE_CACHE_DIR', '/tmp/magpie_cache'); |
|||
|
|||
** Global Cache ** |
|||
|
|||
If you have several applications using Magpie, you can create a single |
|||
shared cache directory, either using the /tmp cache, or somewhere else on |
|||
the system. |
|||
|
|||
The upside is that you'll distribute fetching and parsing feeds across |
|||
several applications. |
|||
|
|||
INSTALLING MAGPIE SERVER WIDE |
|||
|
|||
Rather then following the Quickstart instructions which requires you to have |
|||
a copy of Magpie per application, alternately you can place it in some |
|||
shared location. |
|||
|
|||
** Adding Magpie to Your Include Path ** |
|||
|
|||
Copy the 5 resources (rss_fetch.inc, rss_parser.inc, rss_cache.inc, |
|||
rss_utils.inc, and extlib) to a directory named 'magpierss' in your include |
|||
path. Now any PHP file on your system can use Magpie with: |
|||
|
|||
require_once('magpierss/rss_fetch.inc'); |
|||
|
|||
Different installs have different include paths, and you'll have to figure |
|||
out what your include_path is. |
|||
|
|||
From shell you can try: |
|||
|
|||
php -i | grep 'include_path' |
|||
|
|||
Alternatley you can create a phpinfo.php file with contains: |
|||
|
|||
<?php phpinfo(); ?> |
|||
|
|||
Debian's default is: |
|||
|
|||
/usr/share/php |
|||
|
|||
(though more idealogically pure location would be /usr/local/share/php) |
|||
|
|||
Apple's default include path is: |
|||
|
|||
/usr/lib/php |
|||
|
|||
While the Entropy PHP build seems to use: |
|||
|
|||
/usr/local/php/lib/php |
@ -1,53 +0,0 @@ |
|||
MagpieRSS News |
|||
|
|||
MAGPIERSS 0.51 RELEASED |
|||
* important bugfix! |
|||
* fix "silent failure" when PHP doesn't have zlib |
|||
|
|||
FEED ON FEEDS USES MAGPIE |
|||
* web-based RSS aggregator built with Magpie |
|||
* easy to install, easy to use. |
|||
http://minutillo.com/steve/feedonfeeds/ |
|||
|
|||
MAGPIERSS 0.5 RELEASED |
|||
* supports transparent HTTP gzip content negotiation for reduced bandwidth usage |
|||
* quashed some undefined index notices |
|||
|
|||
MAGPIERSS 0.46 RELEASED |
|||
* minor release, more error handling clean up |
|||
* documentation fixes, simpler example |
|||
* new trouble shooting guide for installation and usage problems |
|||
http://magpierss.sourceforge.net/TROUBLESHOOTING |
|||
|
|||
MAGPIE NEWS AS RSS |
|||
* releases, bug fixes, releated stories in RSS |
|||
|
|||
MAGPIERSS COOKBOOK: SIMPLE PHP RSS HOW TOS |
|||
* answers some of the most frequently asked Magpie questions |
|||
* feedback, suggestions, requests, recipes welcome |
|||
http://magpierss.sourceforge.net/cookbook.html |
|||
|
|||
MAGPIERSS 0.4 RELEASED! |
|||
* improved error handling, more flexibility for script authors, backwards compatible |
|||
* new and better examples! including using MagpieRSS and Smarty |
|||
* new Smarty plugin for RSS date parsing |
|||
http://smarty.php.net |
|||
|
|||
INFINITE PENGUIN NOW SUPPORTS MAGPIE 0.3 |
|||
* simple, sophisticated RSS viewer |
|||
* includes auto-generated javascript ticker from RSS feed |
|||
http://www.infinitepenguins.net/rss/ |
|||
|
|||
TRAUMWIND RELEASES REX BACKEND FOR MAGPIERSS |
|||
* drop in support using regex based XML parser |
|||
* parses improperly formed XML that chokes expat |
|||
http://traumwind.de/blog/magpie/magpie_alike.php |
|||
|
|||
MAGPIERSS 0.3 RELEASED! |
|||
* Support added for HTTP Conditional GETs. |
|||
http://fishbowl.pastiche.org/archives/001132.html |
|||
|
|||
MAGPIERSS 0.2! |
|||
* Major clean up of the code. Easier to use. |
|||
* Simpler install on shared hosts. |
|||
* Better documentation and comments. |
@ -1,48 +0,0 @@ |
|||
NAME |
|||
|
|||
MagpieRSS - a simple RSS integration tool |
|||
|
|||
SYNOPSIS |
|||
|
|||
require_once(rss_fetch.inc); |
|||
$url = $_GET['url']; |
|||
$rss = fetch_rss( $url ); |
|||
|
|||
echo "Channel Title: " . $rss->channel['title'] . "<p>"; |
|||
echo "<ul>"; |
|||
foreach ($rss->items as $item) { |
|||
$href = $item['link']; |
|||
$title = $item['title']; |
|||
echo "<li><a href=$href>$title</a></li>"; |
|||
} |
|||
echo "</ul>"; |
|||
|
|||
DESCRIPTION |
|||
|
|||
MapieRSS is an XML-based RSS parser in PHP. It attempts to be "PHP-like", |
|||
and simple to use. |
|||
|
|||
Some features include: |
|||
|
|||
* supports RSS 0.9 - 1.0, with limited RSS 2.0 support |
|||
* supports namespaces, and modules, including mod_content and mod_event |
|||
* open minded [1] |
|||
* simple, functional interface, to object oriented backend parser |
|||
* automatic caching of parsed RSS objects makes its easy to integrate |
|||
* supports conditional GET with Last-Modified, and ETag |
|||
* uses constants for easy override of default behaviour |
|||
* heavily commented |
|||
|
|||
|
|||
1. By open minded I mean Magpie will accept any tag it finds in good faith that |
|||
it was supposed to be here. For strict validation, look elsewhere. |
|||
|
|||
|
|||
GETTING STARTED |
|||
|
|||
|
|||
|
|||
COPYRIGHT: |
|||
Copyright(c) 2002 kellan@protest.net. All rights reserved. |
|||
This software is released under the GNU General Public License. |
|||
Please read the disclaimer at the top of the Snoopy.class.inc file. |
@ -1,152 +0,0 @@ |
|||
TROUBLESHOOTING |
|||
|
|||
|
|||
Trouble Installing MagpieRSS: |
|||
|
|||
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
|||
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
|||
|
|||
2. Cache couldn't make dir './cache'. |
|||
|
|||
3. Fatal error: Failed to load PHP's XML Extension. |
|||
http://www.php.net/manual/en/ref.xml.php |
|||
|
|||
Trouble Using MagpieRSS |
|||
|
|||
4. Warning: MagpieRSS: Failed to fetch example.com/index.rdf. |
|||
(HTTP Error: Invalid protocol "") |
|||
|
|||
5. Warning: MagpieRSS: Failed to parse RSS file. |
|||
(not well-formed (invalid token) at line 19, column 98) |
|||
|
|||
6. Warning: MagpieRSS: Failed to fetch http://localhost/rss/features.1-0.rss. |
|||
(HTTP Response: HTTP/1.1 404 Not Found) |
|||
|
|||
If you would rather provide a custom error, see the COOKBOOK |
|||
(http://magpierss.sf.net/cookbook.html) recipe 2. |
|||
|
|||
************************************************************************* |
|||
1. Fatal error: Failed opening required '/path/to/script/rss_fetch.inc' |
|||
(include_path='.:/usr/local/lib/php:/usr/local/lib/php/pear') |
|||
|
|||
This could mean that: |
|||
|
|||
a) PHP can't find the MagpieRSS files. |
|||
b) PHP found them the MagpieRSS files, but can't read them. |
|||
|
|||
a. Telling PHP where to look for MagpieRSS file. |
|||
|
|||
This might mean your PHP program can't find the MagpieRSS libraries. |
|||
Magpie relies on 4 include files, rss_fetch.inc, rss_parse.inc, |
|||
rss_cache.inc, rss_util.inc, and for normal use you'll need all 4 (see the |
|||
cookbook for exceptions). |
|||
|
|||
This can be fixed by making sure the MagpieRSS files are in your include |
|||
path. |
|||
|
|||
If you can edit your include path (for example your on a shared host) then |
|||
you need to replace: |
|||
|
|||
require_once('rss_fetch.inc'); |
|||
|
|||
-with- |
|||
|
|||
define('MAGPIE_DIR', '/path/to/magpierss/'); |
|||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
|||
|
|||
b. PHP can't read the MagpieRSS files |
|||
|
|||
All PHP libraries need to be readable by your webserver. |
|||
|
|||
On Unix you can accomplish this with: |
|||
|
|||
chmod 755 rss_fetch.inc rss_parse.inc rss_cache.inc rss_util.inc |
|||
|
|||
************************************************************************* |
|||
2. Cache couldn't make dir './cache'. |
|||
|
|||
MagpieRSS caches the results of fetched and parsed RSS to reduce the load on |
|||
both your server, and the remote server providing the RSS. It does this by |
|||
writing files to a cache directory. |
|||
|
|||
This error means the webserver doesn't have write access to the current |
|||
directory. |
|||
|
|||
a. Make a webserver writeable cache directory |
|||
|
|||
Find the webserver's group. (on my system it is 'www') |
|||
|
|||
mkdir ./cache |
|||
chgrp www directory_name |
|||
chmod g+w directory_name |
|||
|
|||
(this is the best, and desired solution) |
|||
|
|||
b. Tell MagpieRSS to create the cache directory somewhere the webserver can |
|||
write to. |
|||
|
|||
define('MAGPIE_CACHE_DIR', '/tmp/magpierss'); |
|||
|
|||
(this is not a great solution, and might have security considerations) |
|||
|
|||
c. Turn off cacheing. |
|||
|
|||
Magpie can work fine with cacheing, but it will be slower, and you might |
|||
become a nuiance to the RSS provider, but it is an option. |
|||
|
|||
define('MAGPIE_CACHE_ON', 0); |
|||
|
|||
d. And lastly, do NOT |
|||
|
|||
chmod 777 ./cache |
|||
|
|||
Any of the above solutions are better then this. |
|||
|
|||
NOTE: If none of this works for you, let me know. I've got root, and a |
|||
custom compiled Apache on almost any box I ever touch, so I can be a little |
|||
out of touch with reality. But I won't know that if I don't feedback. |
|||
|
|||
************************************************************************* 3. |
|||
3. Fatal error: Failed to load PHP's XML Extension. |
|||
http://www.php.net/manual/en/ref.xml.php |
|||
|
|||
-or- |
|||
|
|||
Fatal error: Failed to create an instance of PHP's XML parser. |
|||
http://www.php.net/manual/en/ref.xml.php |
|||
|
|||
Make sure your PHP was built with --with-xml |
|||
|
|||
This has been turned on by default for several versions of PHP, but it might |
|||
be turned off in your build. |
|||
|
|||
See php.net for details on building and configuring PHP. |
|||
|
|||
|
|||
************************************************************************* |
|||
4. Warning: MagpieRSS: Failed to fetch index.rdf. |
|||
(HTTP Error: Invalid protocol "") |
|||
|
|||
You need to put http:// in front of your the URL to your RSS feed |
|||
|
|||
************************************************************************* |
|||
5. Warning: MagpieRSS: Failed to parse RSS file. |
|||
(not well-formed (invalid token) at line 19, column 98) |
|||
|
|||
There is a problem with the RSS feed you are trying to read. |
|||
MagpieRSS is an XML parser, and therefore can't parse RSS feed with invalid |
|||
characters. Some RSS parser are based on regular expressions, and can |
|||
parse invalid RSS but they have their own problems. |
|||
|
|||
You could try contacting the author of the RSS feed, and pointing them to |
|||
the online RSS validator at: |
|||
|
|||
http://feeds.archive.org/validator/ |
|||
|
|||
************************************************************************* |
|||
6. Warning: MagpieRSS: Failed to fetch http://example.com/index.rdf |
|||
(HTTP Response: HTTP/1.1 404 Not Found) |
|||
|
|||
Its a 404! The RSS file ain't there. |
|||
|
|||
|
@ -1,125 +0,0 @@ |
|||
MAGPIERSS RECIPES: Cooking with Corbies |
|||
|
|||
"Four and twenty blackbirds baked in a pie." |
|||
|
|||
1. LIMIT THE NUMBER OF HEADLINES(AKA ITEMS) RETURNED. |
|||
|
|||
PROBLEM: |
|||
|
|||
You want to display the 10 (or 3) most recent headlines, but the RSS feed |
|||
contains 15. |
|||
|
|||
SOLUTION: |
|||
|
|||
$num_items = 10; |
|||
$rss = fetch_rss($url); |
|||
|
|||
$items = array_slice($rss->items, 0, $num_items); |
|||
|
|||
DISCUSSION: |
|||
|
|||
Rather then trying to limit the number of items Magpie parses, a much simpler, |
|||
and more flexible approach is to take a "slice" of the array of items. And |
|||
array_slice() is smart enough to do the right thing if the feed has less items |
|||
then $num_items. |
|||
|
|||
See: http://www.php.net/array_slice |
|||
|
|||
|
|||
2. DISPLAY A CUSTOM ERROR MESSAGE IF SOMETHING GOES WRONG |
|||
|
|||
PROBLEM: |
|||
|
|||
You don't want Magpie's error messages showing up if something goes wrong. |
|||
|
|||
SOLUTION: |
|||
|
|||
# Magpie throws USER_WARNINGS only |
|||
# so you can cloak these, by only showing ERRORs |
|||
error_reporting(E_ERROR); |
|||
|
|||
# check the return value of fetch_rss() |
|||
|
|||
$rss = fetch_rss($url); |
|||
|
|||
if ( $rss ) { |
|||
...display rss feed... |
|||
} |
|||
else { |
|||
echo "An error occured! " . |
|||
"Consider donating more $$$ for restoration of services." . |
|||
"<br>Error Message: " . magpie_error(); |
|||
} |
|||
|
|||
DISCUSSION: |
|||
|
|||
MagpieRSS triggers a warning in a number of circumstances. The 2 most common |
|||
circumstances are: if the specified RSS file isn't properly formed (usually |
|||
because it includes illegal HTML), or if Magpie can't download the remote RSS |
|||
file, and there is no cached version. |
|||
|
|||
If you don't want your users to see these warnings change your error_reporting |
|||
settings to only display ERRORs. Another option is to turn off display_error, |
|||
so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. |
|||
|
|||
You can do this with: |
|||
|
|||
ini_set('display_errors', 0); |
|||
|
|||
See: http://www.php.net/error_reporting, |
|||
http://www.php.net/ini_set, |
|||
http://www.php.net/manual/en/ref.errorfunc.php |
|||
|
|||
3. GENERATE A NEW RSS FEED |
|||
|
|||
PROBLEM: |
|||
|
|||
Create an RSS feed for other people to use. |
|||
|
|||
SOLUTION: |
|||
|
|||
Use Useful Inc's RSSWriter (http://usefulinc.com/rss/rsswriter/) |
|||
|
|||
DISCUSSION: |
|||
|
|||
An example of turning a Magpie parsed RSS object back into an RSS file is forth |
|||
coming. In the meantime RSSWriter has great documentation. |
|||
|
|||
4. DISPLAY HEADLINES MORE RECENT THEN X DATE |
|||
|
|||
PROBLEM: |
|||
|
|||
You only want to display headlines that were published on, or after a certain |
|||
date. |
|||
|
|||
|
|||
SOLUTION: |
|||
|
|||
require 'rss_utils.inc'; |
|||
|
|||
# get all headlines published today |
|||
$today = getdate(); |
|||
|
|||
# today, 12AM |
|||
$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); |
|||
|
|||
$rss = fetch_rss($url); |
|||
|
|||
foreach ( $rss->items as $item ) { |
|||
$published = parse_w3cdtf($item['dc']['date']); |
|||
if ( $published >= $date ) { |
|||
echo "Title: " . $item['title']; |
|||
echo "Published: " . date("h:i:s A", $published); |
|||
echo "<p>"; |
|||
} |
|||
} |
|||
|
|||
DISCUSSION: |
|||
|
|||
This recipe only works for RSS 1.0 feeds that include the <dc:date> field. |
|||
(which is very good RSS style) |
|||
|
|||
parse_w3cdtf is defined in rss_utils.inc, and parses RSS style dates into Unix |
|||
epoch seconds. |
|||
|
|||
See: http://www.php.net/manual/en/ref.datetime.php |
@ -1,900 +0,0 @@ |
|||
<?php |
|||
|
|||
/************************************************* |
|||
|
|||
Snoopy - the PHP net client |
|||
Author: Monte Ohrt <monte@ispi.net> |
|||
Copyright (c): 1999-2000 ispi, all rights reserved |
|||
Version: 1.0 |
|||
|
|||
* This library is free software; you can redistribute it and/or |
|||
* modify it under the terms of the GNU Lesser General Public |
|||
* License as published by the Free Software Foundation; either |
|||
* version 2.1 of the License, or (at your option) any later version. |
|||
* |
|||
* This library is distributed in the hope that it will be useful, |
|||
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
* Lesser General Public License for more details. |
|||
* |
|||
* You should have received a copy of the GNU Lesser General Public |
|||
* License along with this library; if not, write to the Free Software |
|||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|||
|
|||
You may contact the author of Snoopy by e-mail at: |
|||
monte@ispi.net |
|||
|
|||
Or, write to: |
|||
Monte Ohrt |
|||
CTO, ispi |
|||
237 S. 70th suite 220 |
|||
Lincoln, NE 68510 |
|||
|
|||
The latest version of Snoopy can be obtained from: |
|||
http://snoopy.sourceforge.com |
|||
|
|||
*************************************************/ |
|||
|
|||
class Snoopy |
|||
{ |
|||
/**** Public variables ****/ |
|||
|
|||
/* user definable vars */ |
|||
|
|||
var $host = "www.php.net"; // host name we are connecting to |
|||
var $port = 80; // port we are connecting to |
|||
var $proxy_host = ""; // proxy host to use |
|||
var $proxy_port = ""; // proxy port to use |
|||
var $agent = "Snoopy v1.0"; // agent we masquerade as |
|||
var $referer = ""; // referer info to pass |
|||
var $cookies = array(); // array of cookies to pass |
|||
// $cookies["username"]="joe"; |
|||
var $rawheaders = array(); // array of raw headers to send |
|||
// $rawheaders["Content-type"]="text/html"; |
|||
|
|||
var $maxredirs = 5; // http redirection depth maximum. 0 = disallow |
|||
var $lastredirectaddr = ""; // contains address of last redirected address |
|||
var $offsiteok = true; // allows redirection off-site |
|||
var $maxframes = 0; // frame content depth maximum. 0 = disallow |
|||
var $expandlinks = true; // expand links to fully qualified URLs. |
|||
// this only applies to fetchlinks() |
|||
// or submitlinks() |
|||
var $passcookies = true; // pass set cookies back through redirects |
|||
// NOTE: this currently does not respect |
|||
// dates, domains or paths. |
|||
|
|||
var $user = ""; // user for http authentication |
|||
var $pass = ""; // password for http authentication |
|||
|
|||
// http accept types |
|||
var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; |
|||
|
|||
var $results = ""; // where the content is put |
|||
|
|||
var $error = ""; // error messages sent here |
|||
var $response_code = ""; // response code returned from server |
|||
var $headers = array(); // headers returned from server sent here |
|||
var $maxlength = 500000; // max return data length (body) |
|||
var $read_timeout = 0; // timeout on read operations, in seconds |
|||
// supported only since PHP 4 Beta 4 |
|||
// set to 0 to disallow timeouts |
|||
var $timed_out = false; // if a read operation timed out |
|||
var $status = 0; // http request status |
|||
|
|||
var $curl_path = "/usr/bin/curl"; |
|||
// Snoopy will use cURL for fetching |
|||
// SSL content if a full system path to |
|||
// the cURL binary is supplied here. |
|||
// set to false if you do not have |
|||
// cURL installed. See http://curl.haxx.se |
|||
// for details on installing cURL. |
|||
// Snoopy does *not* use the cURL |
|||
// library functions built into php, |
|||
// as these functions are not stable |
|||
// as of this Snoopy release. |
|||
|
|||
// send Accept-encoding: gzip? |
|||
var $use_gzip = true; |
|||
|
|||
/**** Private variables ****/ |
|||
|
|||
var $_maxlinelen = 4096; // max line length (headers) |
|||
|
|||
var $_httpmethod = "GET"; // default http request method |
|||
var $_httpversion = "HTTP/1.0"; // default http request version |
|||
var $_submit_method = "POST"; // default submit method |
|||
var $_submit_type = "application/x-www-form-urlencoded"; // default submit type |
|||
var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type |
|||
var $_redirectaddr = false; // will be set if page fetched is a redirect |
|||
var $_redirectdepth = 0; // increments on an http redirect |
|||
var $_frameurls = array(); // frame src urls |
|||
var $_framedepth = 0; // increments on frame depth |
|||
|
|||
var $_isproxy = false; // set if using a proxy server |
|||
var $_fp_timeout = 30; // timeout for socket connection |
|||
|
|||
/*======================================================================*\ |
|||
Function: fetch |
|||
Purpose: fetch the contents of a web page |
|||
(and possibly other protocols in the |
|||
future like ftp, nntp, gopher, etc.) |
|||
Input: $URI the location of the page to fetch |
|||
Output: $this->results the output text from the fetch |
|||
\*======================================================================*/ |
|||
|
|||
function fetch($URI) |
|||
{ |
|||
|
|||
//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS); |
|||
$URI_PARTS = parse_url($URI); |
|||
if (!empty($URI_PARTS["user"])) |
|||
$this->user = $URI_PARTS["user"]; |
|||
if (!empty($URI_PARTS["pass"])) |
|||
$this->pass = $URI_PARTS["pass"]; |
|||
|
|||
switch($URI_PARTS["scheme"]) |
|||
{ |
|||
case "http": |
|||
$this->host = $URI_PARTS["host"]; |
|||
if(!empty($URI_PARTS["port"])) |
|||
$this->port = $URI_PARTS["port"]; |
|||
if($this->_connect($fp)) |
|||
{ |
|||
if($this->_isproxy) |
|||
{ |
|||
// using proxy, send entire URI |
|||
$this->_httprequest($URI,$fp,$URI,$this->_httpmethod); |
|||
} |
|||
else |
|||
{ |
|||
$path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : ""); |
|||
// no proxy, send only the path |
|||
$this->_httprequest($path, $fp, $URI, $this->_httpmethod); |
|||
} |
|||
|
|||
$this->_disconnect($fp); |
|||
|
|||
if($this->_redirectaddr) |
|||
{ |
|||
/* url was redirected, check if we've hit the max depth */ |
|||
if($this->maxredirs > $this->_redirectdepth) |
|||
{ |
|||
// only follow redirect if it's on this site, or offsiteok is true |
|||
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
|||
{ |
|||
/* follow the redirect */ |
|||
$this->_redirectdepth++; |
|||
$this->lastredirectaddr=$this->_redirectaddr; |
|||
$this->fetch($this->_redirectaddr); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
|||
{ |
|||
$frameurls = $this->_frameurls; |
|||
$this->_frameurls = array(); |
|||
|
|||
while(list(,$frameurl) = each($frameurls)) |
|||
{ |
|||
if($this->_framedepth < $this->maxframes) |
|||
{ |
|||
$this->fetch($frameurl); |
|||
$this->_framedepth++; |
|||
} |
|||
else |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
return false; |
|||
} |
|||
return true; |
|||
break; |
|||
case "https": |
|||
if(!$this->curl_path || (!is_executable($this->curl_path))) { |
|||
$this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n"; |
|||
return false; |
|||
} |
|||
$this->host = $URI_PARTS["host"]; |
|||
if(!empty($URI_PARTS["port"])) |
|||
$this->port = $URI_PARTS["port"]; |
|||
if($this->_isproxy) |
|||
{ |
|||
// using proxy, send entire URI |
|||
$this->_httpsrequest($URI,$URI,$this->_httpmethod); |
|||
} |
|||
else |
|||
{ |
|||
$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); |
|||
// no proxy, send only the path |
|||
$this->_httpsrequest($path, $URI, $this->_httpmethod); |
|||
} |
|||
|
|||
if($this->_redirectaddr) |
|||
{ |
|||
/* url was redirected, check if we've hit the max depth */ |
|||
if($this->maxredirs > $this->_redirectdepth) |
|||
{ |
|||
// only follow redirect if it's on this site, or offsiteok is true |
|||
if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok) |
|||
{ |
|||
/* follow the redirect */ |
|||
$this->_redirectdepth++; |
|||
$this->lastredirectaddr=$this->_redirectaddr; |
|||
$this->fetch($this->_redirectaddr); |
|||
} |
|||
} |
|||
} |
|||
|
|||
if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) |
|||
{ |
|||
$frameurls = $this->_frameurls; |
|||
$this->_frameurls = array(); |
|||
|
|||
while(list(,$frameurl) = each($frameurls)) |
|||
{ |
|||
if($this->_framedepth < $this->maxframes) |
|||
{ |
|||
$this->fetch($frameurl); |
|||
$this->_framedepth++; |
|||
} |
|||
else |
|||
break; |
|||
} |
|||
} |
|||
return true; |
|||
break; |
|||
default: |
|||
// not a valid protocol |
|||
$this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; |
|||
return false; |
|||
break; |
|||
} |
|||
return true; |
|||
} |
|||
|
|||
|
|||
|
|||
/*======================================================================*\ |
|||
Private functions |
|||
\*======================================================================*/ |
|||
|
|||
|
|||
/*======================================================================*\ |
|||
Function: _striplinks |
|||
Purpose: strip the hyperlinks from an html document |
|||
Input: $document document to strip. |
|||
Output: $match an array of the links |
|||
\*======================================================================*/ |
|||
|
|||
function _striplinks($document) |
|||
{ |
|||
preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href= |
|||
([\"\'])? # find single or double quote |
|||
(?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching |
|||
# quote, otherwise match up to next space |
|||
'isx",$document,$links); |
|||
|
|||
|
|||
// catenate the non-empty matches from the conditional subpattern |
|||
|
|||
while(list($key,$val) = each($links[2])) |
|||
{ |
|||
if(!empty($val)) |
|||
$match[] = $val; |
|||
} |
|||
|
|||
while(list($key,$val) = each($links[3])) |
|||
{ |
|||
if(!empty($val)) |
|||
$match[] = $val; |
|||
} |
|||
|
|||
// return the links |
|||
return $match; |
|||
} |
|||
|
|||
/*======================================================================*\ |
|||
Function: _stripform |
|||
Purpose: strip the form elements from an html document |
|||
Input: $document document to strip. |
|||
Output: $match an array of the links |
|||
\*======================================================================*/ |
|||
|
|||
function _stripform($document) |
|||
{ |
|||
preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements); |
|||
|
|||
// catenate the matches |
|||
$match = implode("\r\n",$elements[0]); |
|||
|
|||
// return the links |
|||
return $match; |
|||
} |
|||
|
|||
|
|||
|
|||
/*======================================================================*\ |
|||
Function: _striptext |
|||
Purpose: strip the text from an html document |
|||
Input: $document document to strip. |
|||
Output: $text the resulting text |
|||
\*======================================================================*/ |
|||
|
|||
function _striptext($document) |
|||
{ |
|||
|
|||
// I didn't use preg eval (//e) since that is only available in PHP 4.0. |
|||
// so, list your entities one by one here. I included some of the |
|||
// more common ones. |
|||
|
|||
$search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript |
|||
"'<[\/\!]*?[^<>]*?>'si", // strip out html tags |
|||
"'([\r\n])[\s]+'", // strip out white space |
|||
"'&(quote|#34);'i", // replace html entities |
|||
"'&(amp|#38);'i", |
|||
"'&(lt|#60);'i", |
|||
"'&(gt|#62);'i", |
|||
"'&(nbsp|#160);'i", |
|||
"'&(iexcl|#161);'i", |
|||
"'&(cent|#162);'i", |
|||
"'&(pound|#163);'i", |
|||
"'&(copy|#169);'i" |
|||
); |
|||
$replace = array( "", |
|||
"", |
|||
"\\1", |
|||
"\"", |
|||
"&", |
|||
"<", |
|||
">", |
|||
" ", |
|||
chr(161), |
|||
chr(162), |
|||
chr(163), |
|||
chr(169)); |
|||
|
|||
$text = preg_replace($search,$replace,$document); |
|||
|
|||
return $text; |
|||
} |
|||
|
|||
/*======================================================================*\ |
|||
Function: _expandlinks |
|||
Purpose: expand each link into a fully qualified URL |
|||
Input: $links the links to qualify |
|||
$URI the full URI to get the base from |
|||
Output: $expandedLinks the expanded links |
|||
\*======================================================================*/ |
|||
|
|||
function _expandlinks($links,$URI) |
|||
{ |
|||
|
|||
preg_match("/^[^\?]+/",$URI,$match); |
|||
|
|||
$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]); |
|||
|
|||
$search = array( "|^http://".preg_quote($this->host)."|i", |
|||
"|^(?!http://)(\/)?(?!mailto:)|i", |
|||
"|/\./|", |
|||
"|/[^\/]+/\.\./|" |
|||
); |
|||
|
|||
$replace = array( "", |
|||
$match."/", |
|||
"/", |
|||
"/" |
|||
); |
|||
|
|||
$expandedLinks = preg_replace($search,$replace,$links); |
|||
|
|||
return $expandedLinks; |
|||
} |
|||
|
|||
/*======================================================================*\ |
|||
Function: _httprequest |
|||
Purpose: go get the http data from the server |
|||
Input: $url the url to fetch |
|||
$fp the current open file pointer |
|||
$URI the full URI |
|||
$body body contents to send if any (POST) |
|||
Output: |
|||
\*======================================================================*/ |
|||
|
|||
function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="") |
|||
{ |
|||
if($this->passcookies && $this->_redirectaddr) |
|||
$this->setcookies(); |
|||
|
|||
$URI_PARTS = parse_url($URI); |
|||
if(empty($url)) |
|||
$url = "/"; |
|||
$headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; |
|||
if(!empty($this->agent)) |
|||
$headers .= "User-Agent: ".$this->agent."\r\n"; |
|||
if(!empty($this->host) && !isset($this->rawheaders['Host'])) |
|||
$headers .= "Host: ".$this->host."\r\n"; |
|||
if(!empty($this->accept)) |
|||
$headers .= "Accept: ".$this->accept."\r\n"; |
|||
|
|||
if($this->use_gzip) { |
|||
// make sure PHP was built with --with-zlib |
|||
// and we can handle gzipp'ed data |
|||
if ( function_exists(gzinflate) ) { |
|||
$headers .= "Accept-encoding: gzip\r\n"; |
|||
} |
|||
else { |
|||
trigger_error( |
|||
"use_gzip is on, but PHP was built without zlib support.". |
|||
" Requesting file(s) without gzip encoding.", |
|||
E_USER_NOTICE); |
|||
} |
|||
} |
|||
|
|||
if(!empty($this->referer)) |
|||
$headers .= "Referer: ".$this->referer."\r\n"; |
|||
if(!empty($this->cookies)) |
|||
{ |
|||
if(!is_array($this->cookies)) |
|||
$this->cookies = (array)$this->cookies; |
|||
|
|||
reset($this->cookies); |
|||
if ( count($this->cookies) > 0 ) { |
|||
$cookie_headers .= 'Cookie: '; |
|||
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
|||
$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; |
|||
} |
|||
$headers .= substr($cookie_headers,0,-2) . "\r\n"; |
|||
} |
|||
} |
|||
if(!empty($this->rawheaders)) |
|||
{ |
|||
if(!is_array($this->rawheaders)) |
|||
$this->rawheaders = (array)$this->rawheaders; |
|||
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
|||
$headers .= $headerKey.": ".$headerVal."\r\n"; |
|||
} |
|||
if(!empty($content_type)) { |
|||
$headers .= "Content-type: $content_type"; |
|||
if ($content_type == "multipart/form-data") |
|||
$headers .= "; boundary=".$this->_mime_boundary; |
|||
$headers .= "\r\n"; |
|||
} |
|||
if(!empty($body)) |
|||
$headers .= "Content-length: ".strlen($body)."\r\n"; |
|||
if(!empty($this->user) || !empty($this->pass)) |
|||
$headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n"; |
|||
|
|||
$headers .= "\r\n"; |
|||
|
|||
// set the read timeout if needed |
|||
if ($this->read_timeout > 0) |
|||
socket_set_timeout($fp, $this->read_timeout); |
|||
$this->timed_out = false; |
|||
|
|||
fwrite($fp,$headers.$body,strlen($headers.$body)); |
|||
|
|||
$this->_redirectaddr = false; |
|||
unset($this->headers); |
|||
|
|||
// content was returned gzip encoded? |
|||
$is_gzipped = false; |
|||
|
|||
while($currentHeader = fgets($fp,$this->_maxlinelen)) |
|||
{ |
|||
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
|||
{ |
|||
$this->status=-100; |
|||
return false; |
|||
} |
|||
|
|||
// if($currentHeader == "\r\n") |
|||
if(preg_match("/^\r?\n$/", $currentHeader) ) |
|||
break; |
|||
|
|||
// if a header begins with Location: or URI:, set the redirect |
|||
if(preg_match("/^(Location:|URI:)/i",$currentHeader)) |
|||
{ |
|||
// get URL portion of the redirect |
|||
preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches); |
|||
// look for :// in the Location header to see if hostname is included |
|||
if(!preg_match("|\:\/\/|",$matches[2])) |
|||
{ |
|||
// no host in the path, so prepend |
|||
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
|||
// eliminate double slash |
|||
if(!preg_match("|^/|",$matches[2])) |
|||
$this->_redirectaddr .= "/".$matches[2]; |
|||
else |
|||
$this->_redirectaddr .= $matches[2]; |
|||
} |
|||
else |
|||
$this->_redirectaddr = $matches[2]; |
|||
} |
|||
|
|||
if(preg_match("|^HTTP/|",$currentHeader)) |
|||
{ |
|||
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)) |
|||
{ |
|||
$this->status= $status[1]; |
|||
} |
|||
$this->response_code = $currentHeader; |
|||
} |
|||
|
|||
if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) { |
|||
$is_gzipped = true; |
|||
} |
|||
|
|||
$this->headers[] = $currentHeader; |
|||
} |
|||
|
|||
# $results = fread($fp, $this->maxlength); |
|||
$results = ""; |
|||
while ( $data = fread($fp, $this->maxlength) ) { |
|||
$results .= $data; |
|||
if ( |
|||
strlen($results) > $this->maxlength ) { |
|||
break; |
|||
} |
|||
} |
|||
|
|||
// gunzip |
|||
if ( $is_gzipped ) { |
|||
// per http://www.php.net/manual/en/function.gzencode.php |
|||
$results = substr($results, 10); |
|||
$results = gzinflate($results); |
|||
} |
|||
|
|||
if ($this->read_timeout > 0 && $this->_check_timeout($fp)) |
|||
{ |
|||
$this->status=-100; |
|||
return false; |
|||
} |
|||
|
|||
// check if there is a a redirect meta tag |
|||
|
|||
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
|||
{ |
|||
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
|||
} |
|||
|
|||
// have we hit our frame depth and is there frame src to fetch? |
|||
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
|||
{ |
|||
$this->results[] = $results; |
|||
for($x=0; $x<count($match[1]); $x++) |
|||
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
|||
} |
|||
// have we already fetched framed content? |
|||
elseif(is_array($this->results)) |
|||
$this->results[] = $results; |
|||
// no framed content |
|||
else |
|||
$this->results = $results; |
|||
|
|||
return true; |
|||
} |
|||
|
|||
/*======================================================================*\ |
|||
Function: _httpsrequest |
|||
Purpose: go get the https data from the server using curl |
|||
Input: $url the url to fetch |
|||
$URI the full URI |
|||
$body body contents to send if any (POST) |
|||
Output: |
|||
\*======================================================================*/ |
|||
|
|||
function _httpsrequest($url,$URI,$http_method,$content_type="",$body="") |
|||
{ |
|||
if($this->passcookies && $this->_redirectaddr) |
|||
$this->setcookies(); |
|||
|
|||
$headers = array(); |
|||
|
|||
$URI_PARTS = parse_url($URI); |
|||
if(empty($url)) |
|||
$url = "/"; |
|||
// GET ... header not needed for curl |
|||
//$headers[] = $http_method." ".$url." ".$this->_httpversion; |
|||
if(!empty($this->agent)) |
|||
$headers[] = "User-Agent: ".$this->agent; |
|||
if(!empty($this->host)) |
|||
$headers[] = "Host: ".$this->host; |
|||
if(!empty($this->accept)) |
|||
$headers[] = "Accept: ".$this->accept; |
|||
if(!empty($this->referer)) |
|||
$headers[] = "Referer: ".$this->referer; |
|||
if(!empty($this->cookies)) |
|||
{ |
|||
if(!is_array($this->cookies)) |
|||
$this->cookies = (array)$this->cookies; |
|||
|
|||
reset($this->cookies); |
|||
if ( count($this->cookies) > 0 ) { |
|||
$cookie_str = 'Cookie: '; |
|||
foreach ( $this->cookies as $cookieKey => $cookieVal ) { |
|||
$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; |
|||
} |
|||
$headers[] = substr($cookie_str,0,-2); |
|||
} |
|||
} |
|||
if(!empty($this->rawheaders)) |
|||
{ |
|||
if(!is_array($this->rawheaders)) |
|||
$this->rawheaders = (array)$this->rawheaders; |
|||
while(list($headerKey,$headerVal) = each($this->rawheaders)) |
|||
$headers[] = $headerKey.": ".$headerVal; |
|||
} |
|||
if(!empty($content_type)) { |
|||
if ($content_type == "multipart/form-data") |
|||
$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; |
|||
else |
|||
$headers[] = "Content-type: $content_type"; |
|||
} |
|||
if(!empty($body)) |
|||
$headers[] = "Content-length: ".strlen($body); |
|||
if(!empty($this->user) || !empty($this->pass)) |
|||
$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); |
|||
|
|||
for($curr_header = 0; $curr_header < count($headers); $curr_header++) { |
|||
$cmdline_params .= " -H \"".$headers[$curr_header]."\""; |
|||
} |
|||
|
|||
if(!empty($body)) |
|||
$cmdline_params .= " -d \"$body\""; |
|||
|
|||
if($this->read_timeout > 0) |
|||
$cmdline_params .= " -m ".$this->read_timeout; |
|||
|
|||
$headerfile = uniqid(time()); |
|||
|
|||
# accept self-signed certs |
|||
$cmdline_params .= " -k"; |
|||
exec($this->curl_path." -D \"/tmp/$headerfile\"".escapeshellcmd($cmdline_params)." ".escapeshellcmd($URI),$results,$return); |
|||
|
|||
if($return) |
|||
{ |
|||
$this->error = "Error: cURL could not retrieve the document, error $return."; |
|||
return false; |
|||
} |
|||
|
|||
|
|||
$results = implode("\r\n",$results); |
|||
|
|||
$result_headers = file("/tmp/$headerfile"); |
|||
|
|||
$this->_redirectaddr = false; |
|||
unset($this->headers); |
|||
|
|||
for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) |
|||
{ |
|||
|
|||
// if a header begins with Location: or URI:, set the redirect |
|||
if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])) |
|||
{ |
|||
// get URL portion of the redirect |
|||
preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches); |
|||
// look for :// in the Location header to see if hostname is included |
|||
if(!preg_match("|\:\/\/|",$matches[2])) |
|||
{ |
|||
// no host in the path, so prepend |
|||
$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; |
|||
// eliminate double slash |
|||
if(!preg_match("|^/|",$matches[2])) |
|||
$this->_redirectaddr .= "/".$matches[2]; |
|||
else |
|||
$this->_redirectaddr .= $matches[2]; |
|||
} |
|||
else |
|||
$this->_redirectaddr = $matches[2]; |
|||
} |
|||
|
|||
if(preg_match("|^HTTP/|",$result_headers[$currentHeader])) |
|||
{ |
|||
$this->response_code = $result_headers[$currentHeader]; |
|||
if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match)) |
|||
{ |
|||
$this->status= $match[1]; |
|||
} |
|||
} |
|||
$this->headers[] = $result_headers[$currentHeader]; |
|||
} |
|||
|
|||
// check if there is a a redirect meta tag |
|||
|
|||
if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)) |
|||
{ |
|||
$this->_redirectaddr = $this->_expandlinks($match[1],$URI); |
|||
} |
|||
|
|||
// have we hit our frame depth and is there frame src to fetch? |
|||
if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)) |
|||
{ |
|||
$this->results[] = $results; |
|||
for($x=0; $x<count($match[1]); $x++) |
|||
$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host); |
|||
} |
|||
// have we already fetched framed content? |
|||
elseif(is_array($this->results)) |
|||
$this->results[] = $results; |
|||
// no framed content |
|||
else |
|||
$this->results = $results; |
|||
|
|||
unlink("/tmp/$headerfile"); |
|||
|
|||
return true; |
|||
} |
|||
|
|||
/*======================================================================*\ |
|||
Function: setcookies() |
|||
Purpose: set cookies for a redirection |
|||
\*======================================================================*/ |
|||
|
|||
function setcookies() |
|||
{ |
|||
for($x=0; $x<count($this->headers); $x++) |
|||
{ |
|||
if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match)) |
|||
$this->cookies[$match[1]] = $match[2]; |
|||
} |
|||
} |
|||
|
|||
|
|||
/*======================================================================*\ |
|||
Function: _check_timeout |
|||
Purpose: checks whether timeout has occurred |
|||
Input: $fp file pointer |
|||
\*======================================================================*/ |
|||
|
|||
function _check_timeout($fp) |
|||
{ |
|||
if ($this->read_timeout > 0) { |
|||
$fp_status = socket_get_status($fp); |
|||
if ($fp_status["timed_out"]) { |
|||
$this->timed_out = true; |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
/*======================================================================*\ |
|||
Function: _connect |
|||
Purpose: make a socket connection |
|||
Input: $fp file pointer |
|||
\*======================================================================*/ |
|||
|
|||
function _connect(&$fp) |
|||
{ |
|||
if(!empty($this->proxy_host) && !empty($this->proxy_port)) |
|||
{ |
|||
$this->_isproxy = true; |
|||
$host = $this->proxy_host; |
|||
$port = $this->proxy_port; |
|||
} |
|||
else |
|||
{ |
|||
$host = $this->host; |
|||
$port = $this->port; |
|||
} |
|||
|
|||
$this->status = 0; |
|||
|
|||
if($fp = fsockopen( |
|||
$host, |
|||
$port, |
|||
$errno, |
|||
$errstr, |
|||
$this->_fp_timeout |
|||
)) |
|||
{ |
|||
// socket connection succeeded |
|||
|
|||
return true; |
|||
} |
|||
else |
|||
{ |
|||
// socket connection failed |
|||
$this->status = $errno; |
|||
switch($errno) |
|||
{ |
|||
case -3: |
|||
$this->error="socket creation failed (-3)"; |
|||
case -4: |
|||
$this->error="dns lookup failure (-4)"; |
|||
case -5: |
|||
$this->error="connection refused or timed out (-5)"; |
|||
default: |
|||
$this->error="connection failed (".$errno.")"; |
|||
} |
|||
return false; |
|||
} |
|||
} |
|||
/*======================================================================*\ |
|||
Function: _disconnect |
|||
Purpose: disconnect a socket connection |
|||
Input: $fp file pointer |
|||
\*======================================================================*/ |
|||
|
|||
function _disconnect($fp) |
|||
{ |
|||
return(fclose($fp)); |
|||
} |
|||
|
|||
|
|||
/*======================================================================*\ |
|||
Function: _prepare_post_body |
|||
Purpose: Prepare post body according to encoding type |
|||
Input: $formvars - form variables |
|||
$formfiles - form upload files |
|||
Output: post body |
|||
\*======================================================================*/ |
|||
|
|||
function _prepare_post_body($formvars, $formfiles) |
|||
{ |
|||
settype($formvars, "array"); |
|||
settype($formfiles, "array"); |
|||
|
|||
if (count($formvars) == 0 && count($formfiles) == 0) |
|||
return; |
|||
|
|||
switch ($this->_submit_type) { |
|||
case "application/x-www-form-urlencoded": |
|||
reset($formvars); |
|||
while(list($key,$val) = each($formvars)) { |
|||
if (is_array($val) || is_object($val)) { |
|||
while (list($cur_key, $cur_val) = each($val)) { |
|||
$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; |
|||
} |
|||
} else |
|||
$postdata .= urlencode($key)."=".urlencode($val)."&"; |
|||
} |
|||
break; |
|||
|
|||
case "multipart/form-data": |
|||
$this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); |
|||
|
|||
reset($formvars); |
|||
while(list($key,$val) = each($formvars)) { |
|||
if (is_array($val) || is_object($val)) { |
|||
while (list($cur_key, $cur_val) = each($val)) { |
|||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
|||
$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; |
|||
$postdata .= "$cur_val\r\n"; |
|||
} |
|||
} else { |
|||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
|||
$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; |
|||
$postdata .= "$val\r\n"; |
|||
} |
|||
} |
|||
|
|||
reset($formfiles); |
|||
while (list($field_name, $file_names) = each($formfiles)) { |
|||
settype($file_names, "array"); |
|||
while (list(, $file_name) = each($file_names)) { |
|||
if (!is_readable($file_name)) continue; |
|||
|
|||
$fp = fopen($file_name, "r"); |
|||
$file_content = fread($fp, filesize($file_name)); |
|||
fclose($fp); |
|||
$base_name = basename($file_name); |
|||
|
|||
$postdata .= "--".$this->_mime_boundary."\r\n"; |
|||
$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; |
|||
$postdata .= "$file_content\r\n"; |
|||
} |
|||
} |
|||
$postdata .= "--".$this->_mime_boundary."--\r\n"; |
|||
break; |
|||
} |
|||
|
|||
return $postdata; |
|||
} |
|||
} |
|||
|
|||
?> |
@ -1,237 +0,0 @@ |
|||
<html> |
|||
<head> |
|||
<title>Magie RSS Recipes: Simple PHP RSS How To</title> |
|||
<style> |
|||
body { |
|||
font-family:trebuchet MS, trebuchet, verdana, arial, sans-serif; |
|||
font-size: 11px; |
|||
|
|||
} |
|||
|
|||
pre { font-family: "Courier New", monospace; |
|||
padding: 1em; |
|||
margin: 0.2em 2.5em 0.2em 3em; |
|||
background-color: #efeff5; |
|||
border: 1px solid #cfcfcf; |
|||
white-space: pre; |
|||
} |
|||
|
|||
</style> |
|||
</head> |
|||
<body> |
|||
<p> |
|||
<h1>MagpieRSS Recipes: Cooking with Corbies</h1> |
|||
|
|||
<div align="center"><h3><em>"Four and twenty blackbirds baked in a |
|||
pie."</em></h3></div> |
|||
</p> |
|||
<p> |
|||
<ol> |
|||
<li><a href="#limit">Limit the Number of Headlines(aka Items) Returned</a></li> |
|||
<li><a href="#error_message">Display a Custom Error Message if Something Goes |
|||
Wrong</a></li> |
|||
<li><a href="#write_rss">Generate a New RSS Feed</a></li> |
|||
<li><a href="#by_date">Display Headlines More Recent then X Date</a></li> |
|||
<li><a href="#from_file">Parse a Local File Containing RSS</a></li> |
|||
|
|||
</ol> |
|||
</p> |
|||
|
|||
<a name="limit"></a><h2>1. Limit the Number of Headlines(aka Items) Returned.</h2> |
|||
|
|||
<h3>Problem:</h3> |
|||
|
|||
You want to display the 10 (or 3 or whatever) most recent headlines, but the RSS feed |
|||
contains 15. |
|||
|
|||
<h3>Solution:</h3> |
|||
|
|||
<pre> |
|||
$num_items = 10; |
|||
$rss = fetch_rss($url); |
|||
|
|||
$items = array_slice($rss->items, 0, $num_items); |
|||
|
|||
foreach ( $items as $item ) { |
|||
</pre> |
|||
<h3>Discussion:</h3> |
|||
|
|||
Rather then trying to limit the number of items Magpie parses, a much simpler, |
|||
and more flexible approach is to take a "slice" of the array of items. And |
|||
array_slice() is smart enough to do the right thing if the feed has less items |
|||
then $num_items. |
|||
|
|||
<h3>See:</h3> <a href="http://www.php.net/array_slice">http://www.php.net/array_slice</a> |
|||
</p> |
|||
|
|||
<a name="error_message"></a><h2>2. Display a Custom Error Message if Something Goes Wrong</h2> |
|||
|
|||
<h3>Problem:</h3> |
|||
|
|||
You don't want Magpie's error messages showing up if something goes wrong. |
|||
|
|||
<h3>Solution:</h3> |
|||
<pre> |
|||
# Magpie throws USER_WARNINGS only |
|||
# so you can cloak these, by only showing ERRORs |
|||
error_reporting(E_ERROR); |
|||
|
|||
# check the return value of fetch_rss() |
|||
|
|||
$rss = fetch_rss($url); |
|||
|
|||
if ( $rss ) { |
|||
...display rss feed... |
|||
} |
|||
else { |
|||
echo "An error occured! " . |
|||
"Consider donating more $$$ for restoration of services." . |
|||
"<br>Error Message: " . magpie_error(); |
|||
} |
|||
</pre> |
|||
<h3>Discussion:</h3> |
|||
|
|||
MagpieRSS triggers a warning in a number of circumstances. The 2 most common |
|||
circumstances are: if the specified RSS file isn't properly formed (usually |
|||
because it includes illegal HTML), or if Magpie can't download the remote RSS |
|||
file, and there is no cached version. |
|||
|
|||
If you don't want your users to see these warnings change your error_reporting |
|||
settings to only display ERRORs.<br /> |
|||
Another option is to turn off display_error, |
|||
so that WARNINGs, and NOTICEs still go to the error_log but not to the webpages. |
|||
|
|||
You can do this with: |
|||
|
|||
<pre> |
|||
# you can also do this in your php.ini file |
|||
ini_set('display_errors', 0); |
|||
</pre> |
|||
|
|||
<h3>See:</h3> |
|||
<a |
|||
href="http://www.php.net/error_reporting">http://www.php.net/error_reporting</a>,<br |
|||
/> |
|||
<a href="http://www.php.net/ini_set">http://www.php.net/ini_set</a>, <br /> |
|||
<a |
|||
href="http://www.php.net/manual/en/ref.errorfunc.php">http://www.php.net/manual/en/ref.errorfunc.php</a><br |
|||
/> |
|||
|
|||
<a name="write_rss"></a><h2>3. Generate a New RSS Feed</h2> |
|||
|
|||
<h3>Problem:</h3> |
|||
|
|||
Create an RSS feed for other people to use. |
|||
|
|||
<h3>Solution:</h3> |
|||
|
|||
Use Useful Inc's <a href="http://usefulinc.com/rss/rsswriter/">RSSWriter</a>. |
|||
|
|||
<h3>Discussion:</h3> |
|||
|
|||
An example of turning a Magpie parsed RSS object back into an RSS file is |
|||
forthcoming. In the meantime RSSWriter is well documented. |
|||
|
|||
<a name="by_date"></a><h2>4. Display Headlines More Recent then X Date</h2> |
|||
|
|||
<h3>Problem:</h3> |
|||
|
|||
You only want to display headlines that were published on, or after a certain |
|||
date. |
|||
|
|||
|
|||
<h3>Solution:</h3> |
|||
<pre> |
|||
require_once('rss_utils.inc'); |
|||
|
|||
# get all headlines published today |
|||
$today = getdate(); |
|||
|
|||
# today, 12AM |
|||
$date = mktime(0,0,0,$today['mon'], $today['mday'], $today['year']); |
|||
|
|||
$rss = fetch_rss($url); |
|||
|
|||
foreach ( $rss->items as $item ) { |
|||
$published = parse_w3cdtf($item['dc']['date']); |
|||
if ( $published >= $date ) { |
|||
echo "Title: " . $item['title']; |
|||
echo "Published: " . date("h:i:s A", $published); |
|||
echo "<p>"; |
|||
} |
|||
} |
|||
</pre> |
|||
<h3>Discussion:</h3> |
|||
|
|||
This recipe only works for RSS 1.0 feeds that include the <dc:date> field. |
|||
(which is very good RSS style) <br /> |
|||
<code>parse_w3cdtf()</code> is defined in |
|||
<code>rss_utils.inc</code>, and parses RSS style dates into Unix epoch |
|||
seconds. |
|||
|
|||
<h3>See: </h3> |
|||
<a |
|||
href="http://www.php.net/manual/en/ref.datetime.php">http://www.php.net/manual/en/ref.datetime.php</a> |
|||
|
|||
<a name="from_file"></a> |
|||
<h2>5. Parse a Local File Containing RSS</h2> |
|||
<h3>Problem:</h3> |
|||
MagpieRSS provides <code>fetch_rss()</code> which takes a URL and returns a |
|||
parsed RSS object, but what if you want to parse a file stored locally that |
|||
doesn't have a URL? |
|||
|
|||
<h3>Solution</h3> |
|||
<pre> |
|||
require_once('rss_parse.inc'); |
|||
|
|||
$rss_file = 'some_rss_file.rdf'; |
|||
$rss_string = read_file($rss_file); |
|||
$rss = new MagpieRSS( $rss_string ); |
|||
|
|||
if ( $rss and !$rss->ERROR) { |
|||
...display rss... |
|||
} |
|||
else { |
|||
echo "Error: " . $rss->ERROR; |
|||
} |
|||
|
|||
# efficiently read a file into a string |
|||
# in php >= 4.3.0 you can simply use file_get_contents() |
|||
# |
|||
function read_file($filename) { |
|||
$fh = fopen($filename, 'r') or die($php_errormsg); |
|||
$rss_string = fread($fh, filesize($filename) ); |
|||
fclose($fh); |
|||
return $rss_string; |
|||
} |
|||
</pre> |
|||
|
|||
<h3>Discussion</h3> |
|||
Here we are using MagpieRSS's RSS parser directly without the convience wrapper |
|||
of <code>fetch_rss()</code>. We read the contents of the RSS file into a |
|||
string, and pass it to the parser constructor. Notice also that error handling |
|||
is subtly different. |
|||
|
|||
<h3>See: </h3> |
|||
<a |
|||
href="http://www.php.net/manual/en/ref.filesystem.php">http://www.php.net/manual/en/ref.filesystem.php</a>,<br |
|||
/> |
|||
<a |
|||
href="http://www.php.net/manual/en/language.oop.php">http://www.php.net/manual/en/language.oop.php</a> |
|||
|
|||
<!-- |
|||
<a name="link"></a><h2>#. Recipe</h2> |
|||
<h3>Problem:</h3> |
|||
Problem description |
|||
<h3>Solution</h3> |
|||
<pre> |
|||
code |
|||
</pre> |
|||
<h3>Discussion/h3> |
|||
Discuss code |
|||
<h3>See: </h3> |
|||
Documentation links: |
|||
--> |
|||
|
|||
</body> |
|||
</html> |
@ -1,419 +0,0 @@ |
|||
<html> |
|||
<head> |
|||
<title>Magpie RSS - PHP RSS Parser</title> |
|||
<link rel="alternate" type="application/rss+xml" title="RSS" |
|||
href="http://laughingmeme.org/magpierss.rdf" /> |
|||
<style> |
|||
body { |
|||
font-family:trebuchet MS, trebuchet, verdana, arial, sans-serif; |
|||
font-size: 11px; |
|||
|
|||
} |
|||
|
|||
pre { font-family: "Courier New", monospace; |
|||
padding: 1em; |
|||
margin: 0.2em 2.5em 0.2em 3em; |
|||
background-color: #efeff5; |
|||
border: 1px solid #cfcfcf; |
|||
white-space: pre; |
|||
} |
|||
|
|||
li.news { |
|||
padding-bottom:15px; |
|||
} |
|||
|
|||
a.nav { color: #FFFFFF; } |
|||
|
|||
div.nav { |
|||
width: 2in; |
|||
float: right; |
|||
border: 2px solid #cfcfcf; |
|||
padding: 5px; |
|||
background-color: #996699; |
|||
} |
|||
|
|||
</style> |
|||
</head> |
|||
<body> |
|||
<img src="magpie-photo.jpg"> |
|||
<h1>MagpieRSS</h1> |
|||
<p> |
|||
<h2>MagpieRSS provides an XML-based (expat) RSS parser in PHP.</h2> |
|||
<p> |
|||
MagpieRSS is compatible with RSS .9 through RSS 1.0, and supports the |
|||
RSS 1.0's modules. (with a few exceptions) |
|||
<p> |
|||
<div class="nav"> |
|||
<center><h3>Project Info</h3></center> |
|||
<ul> |
|||
<li><a class="nav" |
|||
href="http://sourceforge.net/project/showfiles.php?group_id=55691">Download |
|||
Magpie</a></li> |
|||
<li><a class="nav" |
|||
href="http://sourceforge.net/mail/?group_id=55691">Mailing |
|||
Lists</a></li> |
|||
<li><a class="nav" href="#news">News!</a></li> |
|||
<li><a class="nav" href="#why">Why?</a></li> |
|||
<li><a class="nav" href="#features">Features</a></li> |
|||
<li><a class="nav" href="#philosophy">Philosophy</a></li> |
|||
<li><a class="nav" href="#usage">Usage Examples</a></li> |
|||
<li><a class="nav" href="/cookbook.html">Cookbook</a></li> |
|||
<li><a class="nav" href="#todo">Todo</a></li> |
|||
<li style="list-style: none; padding-top: 5px;"><a title="Keep up on MagpieRSS news via RSS" href="http://laughingmeme.org/magpierss.rdf"><img |
|||
src="http://magpierss.sf.net/black_grey_magpie_news.gif" border="0"></a></li> |
|||
</ul> |
|||
</div> |
|||
<a name="news"></a> |
|||
<h3>News!</h3> |
|||
<ul> |
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://sourceforge.net/project/showfiles.php?group_id=55691">MagpieRSS |
|||
0.51 Released</a> |
|||
<ul> |
|||
<li> important bugfix!</li> |
|||
<li> fix <a href="http://laughingmeme.org/archives/000811.html |
|||
">"silent failure"</a> when PHP doesn't have zlib</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
<li class="news"> |
|||
<a href="http://minutillo.com/steve/feedonfeeds/">Feed On Feeds Uses Magpie</a> |
|||
<ul> |
|||
<li> server based PHP RSS aggregator built with MagpieRSS</li> |
|||
<li> easy to install, easy to use.</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=158897">MagpieRSS |
|||
0.5 Released</a> |
|||
<ul> |
|||
<li> supports transparent HTTP gzip content negotiation for reduced bandwidth usage</li> |
|||
<li> quashed some undefined index notices</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=139643">MagpieRSS |
|||
0.46 Released</a> |
|||
<ul> |
|||
<li> minor release, more error handling clean up</li> |
|||
<li> documentation fixes, simpler example</li> |
|||
<li> new <a href="/TROUBLESHOOTING">trouble shooting</a> guide for installation and usage problems</a> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://laughingmeme.org/magpierss.rdf">Magpie News as RSS</a> |
|||
<ul> |
|||
<li> releases, bug fixes, releated stories as an RSS feed</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://magpierss.sourceforge.net/cookbook.html">MagpieRSS |
|||
Cookbook: Simple PHP RSS How Tos</a> |
|||
<ul> |
|||
<li> answers some of the most frequently asked Magpie questions</li> |
|||
<li> feedback, suggestions, requests, recipes welcome</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
<li clas="news"> |
|||
<a href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=134850">MagpieRSS 0.4 Released!</a> |
|||
<ul> |
|||
<li> improved error handling, more flexibility for script authors, |
|||
backwards compatible</li> |
|||
<li> new and better examples! including using MagpieRSS and <a |
|||
href="http://smarty.php.net">Smarty</a></li> |
|||
<li> new Smarty plugin for RSS date parsing</li> |
|||
</ul> |
|||
<br /> |
|||
</li> |
|||
<!-- |
|||
<li class="news"> |
|||
<a href="http://www.infinitepenguins.net/rss/">Infinite Penguin now |
|||
supports Magpie 0.3</a> |
|||
<ul> |
|||
<li> simple, sophisticated RSS viewer</li> |
|||
<li> includes auto-generated javascript ticker from RSS feed</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://traumwind.tierpfad.de/blog/magpie/magpie_alike.php">Traumwind |
|||
releases REX backend for MagpieRSS</a> |
|||
<ul> |
|||
<li>drop in support using regex based XML parser</li> |
|||
<li>parses improperly formed XML that chokes expat</li> |
|||
</ul> |
|||
|
|||
</li> |
|||
|
|||
<li class="news"> |
|||
<a |
|||
href="http://sourceforge.net/project/showfiles.php?group_id=55691&release_id=118652"> |
|||
MagpieRSS 0.3 Released!</a> |
|||
<ul> |
|||
<li>Support added for |
|||
<a href="http://fishbowl.pastiche.org/archives/001132.html">HTTP |
|||
Conditional GETs</a>.</li> |
|||
<li>See <a href="http://sourceforge.net/project/shownotes.php?group_id=55691&release_id=118652">ChangeLog</a> |
|||
for more info.</li> |
|||
</ul> |
|||
</li> |
|||
<li class="news">MagpieRSS 0.2!</a> |
|||
<ul> |
|||
<li>Major clean up of the code. Easier to use.</li> |
|||
<li>Simpler install on shared hosts.</li> |
|||
<li>Better documentation and comments.</li> |
|||
</ul> |
|||
</li> |
|||
<li class="news">We've <a href="http://sourceforge.net/projects/magpierss/">moved to |
|||
Sourceforge!</a></li> |
|||
--> |
|||
</ul> |
|||
</p> |
|||
<p> |
|||
<a name="why"></a> |
|||
<h3>Why?</h3> |
|||
I wrote MagpieRSS out of a frustration with the limitations of existing |
|||
solutions. In particular many of the existing PHP solutions seemed to: |
|||
<ul> |
|||
<li>use a parser based on regular expressions, making for an inherently |
|||
fragile solution |
|||
<li>only support early versions of RSS |
|||
<li>discard all the interesting information besides item title, description, |
|||
and link. |
|||
<li>not build proper separation between parsing the RSS and displaying it. |
|||
</ul> |
|||
In particular I failed to find any PHP RSS parsers that could sufficiently |
|||
parse RSS 1.0 feeds, to be useful on the RSS based event feeds we generate |
|||
at <a href="http://protest.net">Protest.net</a>. |
|||
</p> |
|||
<p> |
|||
<a name="features"></a> |
|||
<h3>Features</h3> |
|||
|
|||
<ul> |
|||
<li class="toplevel"> |
|||
<h4>Easy to Use</h4> |
|||
As simple as: |
|||
<pre> |
|||
require('rss_fetch.inc'); |
|||
$rss = fetch_rss($url); |
|||
</pre> |
|||
|
|||
</li> |
|||
<li class="toplevel"> |
|||
<h4>Parses RSS 0.9 - RSS 1.0</h4> |
|||
|
|||
Parses most RSS formats, including support for |
|||
<a href="http://www.purl.org/rss/1.0/modules/">1.0 modules</a> and limited |
|||
namespace support. RSS is packed into convenient data structures; easy to |
|||
use in PHP, and appropriate for passing to a templating system, like |
|||
<a href="http://smarty.php.net">Smarty</a>. |
|||
</li> |
|||
<li> |
|||
<h4>Integrated Object Cache</h4> |
|||
|
|||
Caching the parsed RSS means that the 2nd request is fast, and that |
|||
including the rss_fetch call in your PHP page won't destroy your performance, |
|||
and force you to reply on an external cron job. And it happens transparently. |
|||
|
|||
</li> |
|||
<li> |
|||
<h4>HTTP Conditional GETs</h4> |
|||
|
|||
Save bandwidth and speed up download times with intelligent use of |
|||
Last-Modified and ETag.<br /> See <a |
|||
href="http://fishbowl.pastiche.org/archives/001132.html">HTTP Conditional Get for RSS Hackers</a> |
|||
</li> |
|||
|
|||
<li><h4>Configurable</h4> |
|||
|
|||
Makes extensive use of constants to allow overriding default behaviour, and |
|||
installation on shared hosts. |
|||
</li> |
|||
<li><h4>Modular</h4> |
|||
<ul> |
|||
<li>rss_fetch.inc - wraps a simple interface (<code>fetch_rss()</code>) |
|||
around the library. |
|||
<li>rss_parse.inc - provides the RSS parser, and the RSS object |
|||
<li>rss_cache.inc - a simple (no GC) object cache, optimized for RSS objects |
|||
<li>rss_utils.inc - utility functions for working with RSS. currently |
|||
provides <code>parse_w3cdtf()</code>, for parsing <a |
|||
href="http://www.w3.org/TR/NOTE-datetime">W3CDTF</a> into epoch seconds. |
|||
</ul> |
|||
</ul> |
|||
|
|||
|
|||
</p> |
|||
<p> |
|||
<a name="philosophy"></a> |
|||
<h3>Magpie's approach to parsing RSS</h3> |
|||
|
|||
Magpie takes a naive, and inclusive approach. Absolutely |
|||
non-validating, as long as the RSS feed is well formed, Magpie will |
|||
cheerfully parse new, and never before seen tags in your RSS feeds. |
|||
</p> |
|||
<p> |
|||
This makes it very simple support the varied versions of RSS simply, but |
|||
forces the consumer of a RSS feed to be cognizant of how it is |
|||
structured.(at least if you want to do something fancy) |
|||
</p> |
|||
<p> |
|||
Magpie parses a RSS feed into a simple object, with 4 fields: |
|||
<code>channel</code>, <code>items</code>, <code>image</code>, and |
|||
<code>textinput</code>. |
|||
</p> |
|||
<p> |
|||
<h4>channel</h4> |
|||
<code>$rss->channel</code> contains key-value pairs of all tags, without |
|||
nested tags, found between the root tag (<rdf:RDF>, or <rss>) |
|||
and the end of the document. |
|||
</p> |
|||
<p> |
|||
<h4>items</h4> |
|||
<code>$rss->items</code> is an array of associative arrays, each one |
|||
describing a single item. An example that looks like: |
|||
<pre> |
|||
<item rdf:about="http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257"> |
|||
<title>Weekly Peace Vigil</title> |
|||
<link>http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257</link> |
|||
<description>Wear a white ribbon</description> |
|||
<dc:subject>Peace</dc:subject> |
|||
<ev:startdate>2002-06-01T11:00:00</ev:startdate> |
|||
<ev:location>Northampton, MA</ev:location> |
|||
<ev:enddate>2002-06-01T12:00:00</ev:enddate> |
|||
<ev:type>Protest</ev:type> |
|||
</item> |
|||
</pre><p> |
|||
Is parsed, and pushed on the <code>$rss->items</code> array as: |
|||
<p><pre> |
|||
array( |
|||
title => 'Weekly Peace Vigil', |
|||
link => 'http://protest.net/NorthEast/calendrome.cgi?span=event&ID=210257', |
|||
description => 'Wear a white ribbon', |
|||
dc => array ( |
|||
subject => 'Peace' |
|||
), |
|||
ev => array ( |
|||
startdate => '2002-06-01T11:00:00', |
|||
enddate => '2002-06-01T12:00:00', |
|||
type => 'Protest', |
|||
location => 'Northampton, MA' |
|||
) |
|||
); |
|||
</pre> |
|||
</p> |
|||
<p> |
|||
<h4>image and textinput</h4> |
|||
<code>$rss->image</code> and <code>$rss-textinput</code> are associative arrays |
|||
including name-value pairs for anything found between the respective parent |
|||
tags. |
|||
</p> |
|||
<p> |
|||
<a name="usage"></a> |
|||
<h3>Usage Examples:</h3> |
|||
|
|||
A very simple example would be: |
|||
<pre> |
|||
require_once 'rss_fetch.inc'; |
|||
|
|||
$url = 'http://magpie.sf.net/samples/imc.1-0.rdf'; |
|||
$rss = fetch_rss($url); |
|||
|
|||
echo "Site: ", $rss->channel['title'], "<br>\n"; |
|||
foreach ($rss->items as $item ) { |
|||
$title = $item[title]; |
|||
$url = $item[link]; |
|||
echo "<a href=$url>$title</a></li><br>\n"; |
|||
} |
|||
</pre> |
|||
More soon....in the meantime you can check out a |
|||
<a href="http://www.infinitepenguins.net/rss/">cool tool built with |
|||
MagpieRSS</a>, version 0.1. |
|||
</p> |
|||
<p> |
|||
<a name="todo"></a> |
|||
<h3>Todos</h3> |
|||
<h4>RSS Parser</h4> |
|||
<ul> |
|||
<li>Swap in a smarter parser that includes optional |
|||
support for validation, and required fields.</li> |
|||
|
|||
<li>Support RSS 2.0 (as much as I'm annoyed by it)</li> |
|||
|
|||
<li>Improve support for modules that rely on attributes</li> |
|||
</ul> |
|||
|
|||
<h4>RSS Cache</h4> |
|||
<ul> |
|||
<li>Light-weight garbage collection |
|||
</ul> |
|||
|
|||
<h4>Fetch RSS</h4> |
|||
<ul> |
|||
<li>Attempt to <a |
|||
href="http://diveintomark.org/archives/2002/08/15.html">auto-detect an |
|||
RSS feed</a>, given a URL following, much like <a |
|||
href="http://diveintomark.org/projects/misc/rssfinder.py.txt">rssfinder.py</a>does. |
|||
</li> |
|||
</ul> |
|||
<h4>Misc</h4> |
|||
<ul> |
|||
<li>More examples</li> |
|||
<li>A test suite</li> |
|||
<li>RSS generation, perhaps with <a |
|||
href="http://usefulinc.com/rss/rsswriter/">RSSwriter</a>? |
|||
</li> |
|||
</ul> |
|||
|
|||
</p> |
|||
<p> |
|||
<h3>RSS Resources</h3> |
|||
<ul> |
|||
<li><a href="http://mnot.net/rss/tutorial/">RSS Tutorial for Content Publishers |
|||
and Webmasters</a> is a great place to start. |
|||
<li><a href="http://gils.utah.gov/rss/">RSS Workshop: Publish and Syndicate |
|||
Your News to the Web</a> is also a good introduction</li> |
|||
<li><a href="http://www.disobey.com/amphetadesk/finding_more.html">Finding |
|||
More Channels</a> on how to find RSS feeds. |
|||
<li>Hammersley's <a href="http://rss.benhammersley.com/">Content Syndication |
|||
with XML and RSS</a> is a blog covering RSS current events. |
|||
<li><a href="http://groups.yahoo.com/group/rss-dev/">RSS-DEV mailing |
|||
list</a> is generally a very helpful, informative space, with the occasional |
|||
heated debate |
|||
<li><a href="http://feeds.archive.org/validator/">RSS Validator</a> |
|||
</ul>. |
|||
</p> |
|||
<h3>License and Contact Info</h3> |
|||
Magpie is distributed under the GPL license... |
|||
<p> |
|||
coded by: kellan (at) protest.net, feedback is always appreciated. |
|||
<p> |
|||
<a href="http://sourceforge.net"><img |
|||
src="http://sourceforge.net/sflogo.php?group_id=55691&type=3" |
|||
width="125" height="37" border="0" alt="SourceForge.net Logo"></a> |
|||
<img src="http://laughingmeme.org/magpie_views.gif"> |
|||
</body> |
|||
</html> |
@ -1,200 +0,0 @@ |
|||
<?php |
|||
/* |
|||
* Project: MagpieRSS: a simple RSS integration tool |
|||
* File: rss_cache.inc, a simple, rolling(no GC), cache |
|||
* for RSS objects, keyed on URL. |
|||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
|||
* Version: 0.51 |
|||
* License: GPL |
|||
* |
|||
* The lastest version of MagpieRSS can be obtained from: |
|||
* http://magpierss.sourceforge.net |
|||
* |
|||
* For questions, help, comments, discussion, etc., please join the |
|||
* Magpie mailing list: |
|||
* http://lists.sourceforge.net/lists/listinfo/magpierss-general |
|||
* |
|||
*/ |
|||
|
|||
class RSSCache { |
|||
var $BASE_CACHE = './cache'; // where the cache files are stored |
|||
var $MAX_AGE = 3600; // when are files stale, default one hour |
|||
var $ERROR = ""; // accumulate error messages |
|||
|
|||
function RSSCache ($base='', $age='') { |
|||
if ( $base ) { |
|||
$this->BASE_CACHE = $base; |
|||
} |
|||
if ( $age ) { |
|||
$this->MAX_AGE = $age; |
|||
} |
|||
|
|||
// attempt to make the cache directory |
|||
if ( ! file_exists( $this->BASE_CACHE ) ) { |
|||
$status = @mkdir( $this->BASE_CACHE, 0755 ); |
|||
|
|||
// if make failed |
|||
if ( ! $status ) { |
|||
$this->error( |
|||
"Cache couldn't make dir '" . $this->BASE_CACHE . "'." |
|||
); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: set |
|||
Purpose: add an item to the cache, keyed on url |
|||
Input: url from wich the rss file was fetched |
|||
Output: true on sucess |
|||
\*=======================================================================*/ |
|||
function set ($url, $rss) { |
|||
$this->ERROR = ""; |
|||
$cache_file = $this->file_name( $url ); |
|||
$fp = @fopen( $cache_file, 'w' ); |
|||
|
|||
if ( ! $fp ) { |
|||
$this->error( |
|||
"Cache unable to open file for writing: $cache_file" |
|||
); |
|||
return 0; |
|||
} |
|||
|
|||
|
|||
$data = $this->serialize( $rss ); |
|||
fwrite( $fp, $data ); |
|||
fclose( $fp ); |
|||
|
|||
return $cache_file; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: get |
|||
Purpose: fetch an item from the cache |
|||
Input: url from wich the rss file was fetched |
|||
Output: cached object on HIT, false on MISS |
|||
\*=======================================================================*/ |
|||
function get ($url) { |
|||
$this->ERROR = ""; |
|||
$cache_file = $this->file_name( $url ); |
|||
|
|||
if ( ! file_exists( $cache_file ) ) { |
|||
$this->debug( |
|||
"Cache doesn't contain: $url (cache file: $cache_file)" |
|||
); |
|||
return 0; |
|||
} |
|||
|
|||
$fp = @fopen($cache_file, 'r'); |
|||
if ( ! $fp ) { |
|||
$this->error( |
|||
"Failed to open cache file for reading: $cache_file" |
|||
); |
|||
return 0; |
|||
} |
|||
|
|||
if ($filesize = filesize($cache_file) ) { |
|||
$data = fread( $fp, filesize($cache_file) ); |
|||
$rss = $this->unserialize( $data ); |
|||
|
|||
return $rss; |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: check_cache |
|||
Purpose: check a url for membership in the cache |
|||
and whether the object is older then MAX_AGE (ie. STALE) |
|||
Input: url from wich the rss file was fetched |
|||
Output: cached object on HIT, false on MISS |
|||
\*=======================================================================*/ |
|||
function check_cache ( $url ) { |
|||
$this->ERROR = ""; |
|||
$filename = $this->file_name( $url ); |
|||
|
|||
if ( file_exists( $filename ) ) { |
|||
// find how long ago the file was added to the cache |
|||
// and whether that is longer then MAX_AGE |
|||
$mtime = filemtime( $filename ); |
|||
$age = time() - $mtime; |
|||
if ( $this->MAX_AGE > $age ) { |
|||
// object exists and is current |
|||
return 'HIT'; |
|||
} |
|||
else { |
|||
// object exists but is old |
|||
return 'STALE'; |
|||
} |
|||
} |
|||
else { |
|||
// object does not exist |
|||
return 'MISS'; |
|||
} |
|||
} |
|||
|
|||
function cache_age( $cache_key ) { |
|||
$filename = $this->file_name( $url ); |
|||
if ( file_exists( $filename ) ) { |
|||
$mtime = filemtime( $filename ); |
|||
$age = time() - $mtime; |
|||
return $age; |
|||
} |
|||
else { |
|||
return -1; |
|||
} |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: serialize |
|||
\*=======================================================================*/ |
|||
function serialize ( $rss ) { |
|||
return serialize( $rss ); |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: unserialize |
|||
\*=======================================================================*/ |
|||
function unserialize ( $data ) { |
|||
return unserialize( $data ); |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: file_name |
|||
Purpose: map url to location in cache |
|||
Input: url from wich the rss file was fetched |
|||
Output: a file name |
|||
\*=======================================================================*/ |
|||
function file_name ($url) { |
|||
$filename = md5( $url ); |
|||
return join( DIRECTORY_SEPARATOR, array( $this->BASE_CACHE, $filename ) ); |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: error |
|||
Purpose: register error |
|||
\*=======================================================================*/ |
|||
function error ($errormsg, $lvl=E_USER_WARNING) { |
|||
// append PHP's error message if track_errors enabled |
|||
if ( isset($php_errormsg) ) { |
|||
$errormsg .= " ($php_errormsg)"; |
|||
} |
|||
$this->ERROR = $errormsg; |
|||
if ( MAGPIE_DEBUG ) { |
|||
trigger_error( $errormsg, $lvl); |
|||
} |
|||
else { |
|||
error_log( $errormsg, 0); |
|||
} |
|||
} |
|||
|
|||
function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
|||
if ( MAGPIE_DEBUG ) { |
|||
$this->error("MagpieRSS [debug] $debugmsg", $lvl); |
|||
} |
|||
} |
|||
|
|||
} |
|||
|
|||
?> |
@ -1,460 +0,0 @@ |
|||
<?php |
|||
/* |
|||
* Project: MagpieRSS: a simple RSS integration tool |
|||
* File: rss_fetch.inc, a simple functional interface |
|||
to fetching and parsing RSS files, via the |
|||
function fetch_rss() |
|||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
|||
* License: GPL |
|||
* |
|||
* The lastest version of MagpieRSS can be obtained from: |
|||
* http://magpierss.sourceforge.net |
|||
* |
|||
* For questions, help, comments, discussion, etc., please join the |
|||
* Magpie mailing list: |
|||
* magpierss-general@lists.sourceforge.net |
|||
* |
|||
*/ |
|||
|
|||
// Setup MAGPIE_DIR for use on hosts that don't include |
|||
// the current path in include_path. |
|||
// with thanks to rajiv and smarty |
|||
if (!defined('DIR_SEP')) { |
|||
define('DIR_SEP', DIRECTORY_SEPARATOR); |
|||
} |
|||
|
|||
if (!defined('MAGPIE_DIR')) { |
|||
define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); |
|||
} |
|||
|
|||
require_once( MAGPIE_DIR . 'rss_parse.inc' ); |
|||
require_once( MAGPIE_DIR . 'rss_cache.inc' ); |
|||
|
|||
// for including 3rd party libraries |
|||
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); |
|||
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); |
|||
|
|||
|
|||
/* |
|||
* CONSTANTS - redefine these in your script to change the |
|||
* behaviour of fetch_rss() currently, most options effect the cache |
|||
* |
|||
* MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? |
|||
* For me a built in cache was essential to creating a "PHP-like" |
|||
* feel to Magpie, see rss_cache.inc for rationale |
|||
* |
|||
* |
|||
* MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? |
|||
* This should be a location that the webserver can write to. If this |
|||
* directory does not already exist Mapie will try to be smart and create |
|||
* it. This will often fail for permissions reasons. |
|||
* |
|||
* |
|||
* MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. |
|||
* |
|||
* |
|||
* MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error |
|||
* instead of returning stale object? |
|||
* |
|||
* MAGPIE_DEBUG - Display debugging notices? |
|||
* |
|||
*/ |
|||
|
|||
|
|||
/*=======================================================================*\ |
|||
Function: fetch_rss: |
|||
Purpose: return RSS object for the give url |
|||
maintain the cache |
|||
Input: url of RSS file |
|||
Output: parsed RSS object (see rss_parse.inc) |
|||
|
|||
NOTES ON CACHEING: |
|||
If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. |
|||
|
|||
NOTES ON RETRIEVING REMOTE FILES: |
|||
If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will |
|||
return a cached object, and touch the cache object upon recieving a |
|||
304. |
|||
|
|||
NOTES ON FAILED REQUESTS: |
|||
If there is an HTTP error while fetching an RSS object, the cached |
|||
version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) |
|||
\*=======================================================================*/ |
|||
define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); |
|||
//define('MAGPIE_INPUT_ENCODING', 'UTF-8'); |
|||
define('MAGPIE_DETECT_ENCODING', false); |
|||
define('MAGPIE_VERSION', '0.72'); |
|||
|
|||
$MAGPIE_ERROR = ""; |
|||
|
|||
function fetch_rss ($url) { |
|||
// initialize constants |
|||
init(); |
|||
|
|||
if ( !isset($url) ) { |
|||
error("fetch_rss called without a url"); |
|||
return false; |
|||
} |
|||
|
|||
// if cache is disabled |
|||
if ( !MAGPIE_CACHE_ON ) { |
|||
// fetch file, and parse it |
|||
$resp = _fetch_remote_file( $url ); |
|||
if ( is_success( $resp->status ) ) { |
|||
return _response_to_rss( $resp ); |
|||
} |
|||
else { |
|||
error("Failed to fetch $url and cache is off"); |
|||
return false; |
|||
} |
|||
} |
|||
// else cache is ON |
|||
else { |
|||
// Flow |
|||
// 1. check cache |
|||
// 2. if there is a hit, make sure its fresh |
|||
// 3. if cached obj fails freshness check, fetch remote |
|||
// 4. if remote fails, return stale object, or error |
|||
|
|||
$cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); |
|||
|
|||
if (MAGPIE_DEBUG and $cache->ERROR) { |
|||
debug($cache->ERROR, E_USER_WARNING); |
|||
} |
|||
|
|||
|
|||
$cache_status = 0; // response of check_cache |
|||
$request_headers = array(); // HTTP headers to send with fetch |
|||
$rss = 0; // parsed RSS object |
|||
$errormsg = 0; // errors, if any |
|||
|
|||
// store parsed XML by desired output encoding |
|||
// as character munging happens at parse time |
|||
$cache_key = $url . MAGPIE_OUTPUT_ENCODING; |
|||
|
|||
if (!$cache->ERROR) { |
|||
// return cache HIT, MISS, or STALE |
|||
$cache_status = $cache->check_cache( $cache_key); |
|||
} |
|||
|
|||
// if object cached, and cache is fresh, return cached obj |
|||
if ( $cache_status == 'HIT' ) { |
|||
$rss = $cache->get( $cache_key ); |
|||
if ( isset($rss) and $rss ) { |
|||
// should be cache age |
|||
$rss->from_cache = 1; |
|||
if ( MAGPIE_DEBUG > 1) { |
|||
debug("MagpieRSS: Cache HIT", E_USER_NOTICE); |
|||
} |
|||
return $rss; |
|||
} |
|||
} |
|||
|
|||
// else attempt a conditional get |
|||
|
|||
// setup headers |
|||
if ( $cache_status == 'STALE' ) { |
|||
$rss = $cache->get( $cache_key ); |
|||
if ( $rss and $rss->etag and $rss->last_modified ) { |
|||
$request_headers['If-None-Match'] = $rss->etag; |
|||
$request_headers['If-Last-Modified'] = $rss->last_modified; |
|||
} |
|||
} |
|||
|
|||
$resp = _fetch_remote_file( $url, $request_headers ); |
|||
|
|||
if (isset($resp) and $resp) { |
|||
if ($resp->status == '304' ) { |
|||
// we have the most current copy |
|||
if ( MAGPIE_DEBUG > 1) { |
|||
debug("Got 304 for $url"); |
|||
} |
|||
// reset cache on 304 (at minutillo insistent prodding) |
|||
$cache->set($cache_key, $rss); |
|||
return $rss; |
|||
} |
|||
elseif ( is_success( $resp->status ) ) { |
|||
$rss = _response_to_rss( $resp ); |
|||
if ( $rss ) { |
|||
if (MAGPIE_DEBUG > 1) { |
|||
debug("Fetch successful"); |
|||
} |
|||
// add object to cache |
|||
$cache->set( $cache_key, $rss ); |
|||
return $rss; |
|||
} |
|||
} |
|||
else { |
|||
$errormsg = "Failed to fetch $url "; |
|||
if ( $resp->status == '-100' ) { |
|||
$errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; |
|||
} |
|||
elseif ( $resp->error ) { |
|||
# compensate for Snoopy's annoying habbit to tacking |
|||
# on '\n' |
|||
$http_error = substr($resp->error, 0, -2); |
|||
$errormsg .= "(HTTP Error: $http_error)"; |
|||
} |
|||
else { |
|||
$errormsg .= "(HTTP Response: " . $resp->response_code .')'; |
|||
} |
|||
} |
|||
} |
|||
else { |
|||
$errormsg = "Unable to retrieve RSS file for unknown reasons."; |
|||
} |
|||
|
|||
// else fetch failed |
|||
|
|||
// attempt to return cached object |
|||
if ($rss) { |
|||
if ( MAGPIE_DEBUG ) { |
|||
debug("Returning STALE object for $url"); |
|||
} |
|||
return $rss; |
|||
} |
|||
|
|||
// else we totally failed |
|||
error( $errormsg ); |
|||
|
|||
return false; |
|||
|
|||
} // end if ( !MAGPIE_CACHE_ON ) { |
|||
} // end fetch_rss() |
|||
|
|||
/*=======================================================================*\ |
|||
Function: error |
|||
Purpose: set MAGPIE_ERROR, and trigger error |
|||
\*=======================================================================*/ |
|||
|
|||
function error ($errormsg, $lvl=E_USER_WARNING) { |
|||
global $MAGPIE_ERROR; |
|||
|
|||
// append PHP's error message if track_errors enabled |
|||
if ( isset($php_errormsg) ) { |
|||
$errormsg .= " ($php_errormsg)"; |
|||
} |
|||
if ( $errormsg ) { |
|||
$errormsg = "MagpieRSS: $errormsg"; |
|||
$MAGPIE_ERROR = $errormsg; |
|||
trigger_error( $errormsg, $lvl); |
|||
} |
|||
} |
|||
|
|||
function debug ($debugmsg, $lvl=E_USER_NOTICE) { |
|||
trigger_error("MagpieRSS [debug] $debugmsg", $lvl); |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: magpie_error |
|||
Purpose: accessor for the magpie error variable |
|||
\*=======================================================================*/ |
|||
function magpie_error ($errormsg="") { |
|||
global $MAGPIE_ERROR; |
|||
|
|||
if ( isset($errormsg) and $errormsg ) { |
|||
$MAGPIE_ERROR = $errormsg; |
|||
} |
|||
|
|||
return $MAGPIE_ERROR; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: _fetch_remote_file |
|||
Purpose: retrieve an arbitrary remote file |
|||
Input: url of the remote file |
|||
headers to send along with the request (optional) |
|||
Output: an HTTP response object (see Snoopy.class.inc) |
|||
\*=======================================================================*/ |
|||
function _fetch_remote_file ($url, $headers = "" ) { |
|||
// Snoopy is an HTTP client in PHP |
|||
$client = new Snoopy(); |
|||
$client->agent = MAGPIE_USER_AGENT; |
|||
$client->read_timeout = MAGPIE_FETCH_TIME_OUT; |
|||
$client->use_gzip = MAGPIE_USE_GZIP; |
|||
if (is_array($headers) ) { |
|||
$client->rawheaders = $headers; |
|||
} |
|||
|
|||
@$client->fetch($url); |
|||
return $client; |
|||
|
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: _response_to_rss |
|||
Purpose: parse an HTTP response object into an RSS object |
|||
Input: an HTTP response object (see Snoopy) |
|||
Output: parsed RSS object (see rss_parse) |
|||
\*=======================================================================*/ |
|||
function _response_to_rss ($resp) { |
|||
$rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); |
|||
|
|||
// if RSS parsed successfully |
|||
if ( $rss and !$rss->ERROR) { |
|||
|
|||
// find Etag, and Last-Modified |
|||
foreach($resp->headers as $h) { |
|||
// 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" |
|||
if (strpos($h, ": ")) { |
|||
list($field, $val) = explode(": ", $h, 2); |
|||
} |
|||
else { |
|||
$field = $h; |
|||
$val = ""; |
|||
} |
|||
|
|||
if ( $field == 'ETag' ) { |
|||
$rss->etag = $val; |
|||
} |
|||
|
|||
if ( $field == 'Last-Modified' ) { |
|||
$rss->last_modified = $val; |
|||
} |
|||
} |
|||
|
|||
return $rss; |
|||
} // else construct error message |
|||
else { |
|||
$errormsg = "Failed to parse RSS file."; |
|||
|
|||
if ($rss) { |
|||
$errormsg .= " (" . $rss->ERROR . ")"; |
|||
} |
|||
error($errormsg); |
|||
|
|||
return false; |
|||
} // end if ($rss and !$rss->error) |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: init |
|||
Purpose: setup constants with default values |
|||
check for user overrides |
|||
\*=======================================================================*/ |
|||
function init () { |
|||
if ( defined('MAGPIE_INITALIZED') ) { |
|||
return; |
|||
} |
|||
else { |
|||
define('MAGPIE_INITALIZED', true); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_CACHE_ON') ) { |
|||
define('MAGPIE_CACHE_ON', true); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_CACHE_DIR') ) { |
|||
define('MAGPIE_CACHE_DIR', './cache'); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_CACHE_AGE') ) { |
|||
define('MAGPIE_CACHE_AGE', 60*60); // one hour |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { |
|||
define('MAGPIE_CACHE_FRESH_ONLY', false); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { |
|||
define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_INPUT_ENCODING') ) { |
|||
define('MAGPIE_INPUT_ENCODING', null); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_DETECT_ENCODING') ) { |
|||
define('MAGPIE_DETECT_ENCODING', true); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_DEBUG') ) { |
|||
define('MAGPIE_DEBUG', 0); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_USER_AGENT') ) { |
|||
$ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; |
|||
|
|||
if ( MAGPIE_CACHE_ON ) { |
|||
$ua = $ua . ')'; |
|||
} |
|||
else { |
|||
$ua = $ua . '; No cache)'; |
|||
} |
|||
|
|||
define('MAGPIE_USER_AGENT', $ua); |
|||
} |
|||
|
|||
if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { |
|||
define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout |
|||
} |
|||
|
|||
// use gzip encoding to fetch rss files if supported? |
|||
if ( !defined('MAGPIE_USE_GZIP') ) { |
|||
define('MAGPIE_USE_GZIP', true); |
|||
} |
|||
} |
|||
|
|||
// NOTE: the following code should really be in Snoopy, or at least |
|||
// somewhere other then rss_fetch! |
|||
|
|||
/*=======================================================================*\ |
|||
HTTP STATUS CODE PREDICATES |
|||
These functions attempt to classify an HTTP status code |
|||
based on RFC 2616 and RFC 2518. |
|||
|
|||
All of them take an HTTP status code as input, and return true or false |
|||
|
|||
All this code is adapted from LWP's HTTP::Status. |
|||
\*=======================================================================*/ |
|||
|
|||
|
|||
/*=======================================================================*\ |
|||
Function: is_info |
|||
Purpose: return true if Informational status code |
|||
\*=======================================================================*/ |
|||
function is_info ($sc) { |
|||
return $sc >= 100 && $sc < 200; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: is_success |
|||
Purpose: return true if Successful status code |
|||
\*=======================================================================*/ |
|||
function is_success ($sc) { |
|||
return $sc >= 200 && $sc < 300; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: is_redirect |
|||
Purpose: return true if Redirection status code |
|||
\*=======================================================================*/ |
|||
function is_redirect ($sc) { |
|||
return $sc >= 300 && $sc < 400; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: is_error |
|||
Purpose: return true if Error status code |
|||
\*=======================================================================*/ |
|||
function is_error ($sc) { |
|||
return $sc >= 400 && $sc < 600; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: is_client_error |
|||
Purpose: return true if Error status code, and its a client error |
|||
\*=======================================================================*/ |
|||
function is_client_error ($sc) { |
|||
return $sc >= 400 && $sc < 500; |
|||
} |
|||
|
|||
/*=======================================================================*\ |
|||
Function: is_client_error |
|||
Purpose: return true if Error status code, and its a server error |
|||
\*=======================================================================*/ |
|||
function is_server_error ($sc) { |
|||
return $sc >= 500 && $sc < 600; |
|||
} |
|||
|
|||
?> |
@ -1,605 +0,0 @@ |
|||
<?php |
|||
|
|||
/** |
|||
* Project: MagpieRSS: a simple RSS integration tool |
|||
* File: rss_parse.inc - parse an RSS or Atom feed |
|||
* return as a simple object. |
|||
* |
|||
* Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 |
|||
* |
|||
* The lastest version of MagpieRSS can be obtained from: |
|||
* http://magpierss.sourceforge.net |
|||
* |
|||
* For questions, help, comments, discussion, etc., please join the |
|||
* Magpie mailing list: |
|||
* magpierss-general@lists.sourceforge.net |
|||
* |
|||
* @author Kellan Elliott-McCrea <kellan@protest.net> |
|||
* @version 0.7a |
|||
* @license GPL |
|||
* |
|||
*/ |
|||
|
|||
define('RSS', 'RSS'); |
|||
define('ATOM', 'Atom'); |
|||
|
|||
require_once (MAGPIE_DIR . 'rss_utils.inc'); |
|||
|
|||
/** |
|||
* Hybrid parser, and object, takes RSS as a string and returns a simple object. |
|||
* |
|||
* see: rss_fetch.inc for a simpler interface with integrated caching support |
|||
* |
|||
*/ |
|||
class MagpieRSS { |
|||
var $parser; |
|||
|
|||
var $current_item = array(); // item currently being parsed |
|||
var $items = array(); // collection of parsed items |
|||
var $channel = array(); // hash of channel fields |
|||
var $textinput = array(); |
|||
var $image = array(); |
|||
var $feed_type; |
|||
var $feed_version; |
|||
var $encoding = ''; // output encoding of parsed rss |
|||
|
|||
var $_source_encoding = ''; // only set if we have to parse xml prolog |
|||
|
|||
var $ERROR = ""; |
|||
var $WARNING = ""; |
|||
|
|||
// define some constants |
|||
|
|||
var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); |
|||
var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); |
|||
|
|||
// parser variables, useless if you're not a parser, treat as private |
|||
var $stack = array(); // parser stack |
|||
var $inchannel = false; |
|||
var $initem = false; |
|||
var $incontent = false; // if in Atom <content mode="xml"> field |
|||
var $intextinput = false; |
|||
var $inimage = false; |
|||
var $current_namespace = false; |
|||
|
|||
|
|||
/** |
|||
* Set up XML parser, parse source, and return populated RSS object.. |
|||
* |
|||
* @param string $source string containing the RSS to be parsed |
|||
* |
|||
* NOTE: Probably a good idea to leave the encoding options alone unless |
|||
* you know what you're doing as PHP's character set support is |
|||
* a little weird. |
|||
* |
|||
* NOTE: A lot of this is unnecessary but harmless with PHP5 |
|||
* |
|||
* |
|||
* @param string $output_encoding output the parsed RSS in this character |
|||
* set defaults to ISO-8859-1 as this is PHP's |
|||
* default. |
|||
* |
|||
* NOTE: might be changed to UTF-8 in future |
|||
* versions. |
|||
* |
|||
* @param string $input_encoding the character set of the incoming RSS source. |
|||
* Leave blank and Magpie will try to figure it |
|||
* out. |
|||
* |
|||
* |
|||
* @param bool $detect_encoding if false Magpie won't attempt to detect |
|||
* source encoding. (caveat emptor) |
|||
* |
|||
*/ |
|||
function MagpieRSS ($source, $output_encoding='ISO-8859-1', |
|||
$input_encoding=null, $detect_encoding=true) |
|||
{ |
|||
# if PHP xml isn't compiled in, die |
|||
# |
|||
if (!function_exists('xml_parser_create')) { |
|||
$this->error( "Failed to load PHP's XML Extension. " . |
|||
"http://www.php.net/manual/en/ref.xml.php", |
|||
E_USER_ERROR ); |
|||
} |
|||
|
|||
list($parser, $source) = $this->create_parser($source, |
|||
$output_encoding, $input_encoding, $detect_encoding); |
|||
|
|||
|
|||
if (!is_resource($parser)) { |
|||
$this->error( "Failed to create an instance of PHP's XML parser. " . |
|||
"http://www.php.net/manual/en/ref.xml.php", |
|||
E_USER_ERROR ); |
|||
} |
|||
|
|||
|
|||
$this->parser = $parser; |
|||
|
|||
# pass in parser, and a reference to this object |
|||
# setup handlers |
|||
# |
|||
xml_set_object( $this->parser, $this ); |
|||
xml_set_element_handler($this->parser, |
|||
'feed_start_element', 'feed_end_element' ); |
|||
|
|||
xml_set_character_data_handler( $this->parser, 'feed_cdata' ); |
|||
|
|||
$status = xml_parse( $this->parser, $source ); |
|||
|
|||
if (! $status ) { |
|||
$errorcode = xml_get_error_code( $this->parser ); |
|||
if ( $errorcode != XML_ERROR_NONE ) { |
|||
$xml_error = xml_error_string( $errorcode ); |
|||
$error_line = xml_get_current_line_number($this->parser); |
|||
$error_col = xml_get_current_column_number($this->parser); |
|||
$errormsg = "$xml_error at line $error_line, column $error_col"; |
|||
|
|||
$this->error( $errormsg ); |
|||
} |
|||
} |
|||
|
|||
xml_parser_free( $this->parser ); |
|||
|
|||
$this->normalize(); |
|||
} |
|||
|
|||
function feed_start_element($p, $element, &$attrs) { |
|||
$el = $element = strtolower($element); |
|||
$attrs = array_change_key_case($attrs, CASE_LOWER); |
|||
|
|||
// check for a namespace, and split if found |
|||
$ns = false; |
|||
if ( strpos( $element, ':' ) ) { |
|||
list($ns, $el) = split( ':', $element, 2); |
|||
} |
|||
if ( $ns and $ns != 'rdf' ) { |
|||
$this->current_namespace = $ns; |
|||
} |
|||
|
|||
# if feed type isn't set, then this is first element of feed |
|||
# identify feed from root element |
|||
# |
|||
if (!isset($this->feed_type) ) { |
|||
if ( $el == 'rdf' ) { |
|||
$this->feed_type = RSS; |
|||
$this->feed_version = '1.0'; |
|||
} |
|||
elseif ( $el == 'rss' ) { |
|||
$this->feed_type = RSS; |
|||
$this->feed_version = $attrs['version']; |
|||
} |
|||
elseif ( $el == 'feed' ) { |
|||
$this->feed_type = ATOM; |
|||
$this->feed_version = $attrs['version']; |
|||
$this->inchannel = true; |
|||
} |
|||
return; |
|||
} |
|||
|
|||
if ( $el == 'channel' ) |
|||
{ |
|||
$this->inchannel = true; |
|||
} |
|||
elseif ($el == 'item' or $el == 'entry' ) |
|||
{ |
|||
$this->initem = true; |
|||
if ( isset($attrs['rdf:about']) ) { |
|||
$this->current_item['about'] = $attrs['rdf:about']; |
|||
} |
|||
} |
|||
|
|||
// if we're in the default namespace of an RSS feed, |
|||
// record textinput or image fields |
|||
elseif ( |
|||
$this->feed_type == RSS and |
|||
$this->current_namespace == '' and |
|||
$el == 'textinput' ) |
|||
{ |
|||
$this->intextinput = true; |
|||
} |
|||
|
|||
elseif ( |
|||
$this->feed_type == RSS and |
|||
$this->current_namespace == '' and |
|||
$el == 'image' ) |
|||
{ |
|||
$this->inimage = true; |
|||
} |
|||
|
|||
# handle atom content constructs |
|||
elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
|||
{ |
|||
// avoid clashing w/ RSS mod_content |
|||
if ($el == 'content' ) { |
|||
$el = 'atom_content'; |
|||
} |
|||
|
|||
$this->incontent = $el; |
|||
|
|||
|
|||
} |
|||
|
|||
// if inside an Atom content construct (e.g. content or summary) field treat tags as text |
|||
elseif ($this->feed_type == ATOM and $this->incontent ) |
|||
{ |
|||
// if tags are inlined, then flatten |
|||
$attrs_str = join(' ', |
|||
array_map('map_attrs', |
|||
array_keys($attrs), |
|||
array_values($attrs) ) ); |
|||
|
|||
$this->append_content( "<$element $attrs_str>" ); |
|||
|
|||
array_unshift( $this->stack, $el ); |
|||
} |
|||
|
|||
// Atom support many links per containging element. |
|||
// Magpie treats link elements of type rel='alternate' |
|||
// as being equivalent to RSS's simple link element. |
|||
// |
|||
elseif ($this->feed_type == ATOM and $el == 'link' ) |
|||
{ |
|||
if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) |
|||
{ |
|||
$link_el = 'link'; |
|||
} |
|||
else { |
|||
$link_el = 'link_' . $attrs['rel']; |
|||
} |
|||
|
|||
$this->append($link_el, $attrs['href']); |
|||
} |
|||
// set stack[0] to current element |
|||
else { |
|||
array_unshift($this->stack, $el); |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
function feed_cdata ($p, $text) { |
|||
if ($this->feed_type == ATOM and $this->incontent) |
|||
{ |
|||
$this->append_content( $text ); |
|||
} |
|||
else { |
|||
$current_el = join('_', array_reverse($this->stack)); |
|||
$this->append($current_el, $text); |
|||
} |
|||
} |
|||
|
|||
function feed_end_element ($p, $el) { |
|||
$el = strtolower($el); |
|||
|
|||
if ( $el == 'item' or $el == 'entry' ) |
|||
{ |
|||
$this->items[] = $this->current_item; |
|||
$this->current_item = array(); |
|||
$this->initem = false; |
|||
} |
|||
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) |
|||
{ |
|||
$this->intextinput = false; |
|||
} |
|||
elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) |
|||
{ |
|||
$this->inimage = false; |
|||
} |
|||
elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) |
|||
{ |
|||
$this->incontent = false; |
|||
} |
|||
elseif ($el == 'channel' or $el == 'feed' ) |
|||
{ |
|||
$this->inchannel = false; |
|||
} |
|||
elseif ($this->feed_type == ATOM and $this->incontent ) { |
|||
// balance tags properly |
|||
// note: i don't think this is actually neccessary |
|||
if ( $this->stack[0] == $el ) |
|||
{ |
|||
$this->append_content("</$el>"); |
|||
} |
|||
else { |
|||
$this->append_content("<$el />"); |
|||
} |
|||
|
|||
array_shift( $this->stack ); |
|||
} |
|||
else { |
|||
array_shift( $this->stack ); |
|||
} |
|||
|
|||
$this->current_namespace = false; |
|||
} |
|||
|
|||
function concat (&$str1, $str2="") { |
|||
if (!isset($str1) ) { |
|||
$str1=""; |
|||
} |
|||
$str1 .= $str2; |
|||
} |
|||
|
|||
|
|||
|
|||
function append_content($text) { |
|||
if ( $this->initem ) { |
|||
$this->concat( $this->current_item[ $this->incontent ], $text ); |
|||
} |
|||
elseif ( $this->inchannel ) { |
|||
$this->concat( $this->channel[ $this->incontent ], $text ); |
|||
} |
|||
} |
|||
|
|||
// smart append - field and namespace aware |
|||
function append($el, $text) { |
|||
if (!$el) { |
|||
return; |
|||
} |
|||
if ( $this->current_namespace ) |
|||
{ |
|||
if ( $this->initem ) { |
|||
$this->concat( |
|||
$this->current_item[ $this->current_namespace ][ $el ], $text); |
|||
} |
|||
elseif ($this->inchannel) { |
|||
$this->concat( |
|||
$this->channel[ $this->current_namespace][ $el ], $text ); |
|||
} |
|||
elseif ($this->intextinput) { |
|||
$this->concat( |
|||
$this->textinput[ $this->current_namespace][ $el ], $text ); |
|||
} |
|||
elseif ($this->inimage) { |
|||
$this->concat( |
|||
$this->image[ $this->current_namespace ][ $el ], $text ); |
|||
} |
|||
} |
|||
else { |
|||
if ( $this->initem ) { |
|||
$this->concat( |
|||
$this->current_item[ $el ], $text); |
|||
} |
|||
elseif ($this->intextinput) { |
|||
$this->concat( |
|||
$this->textinput[ $el ], $text ); |
|||
} |
|||
elseif ($this->inimage) { |
|||
$this->concat( |
|||
$this->image[ $el ], $text ); |
|||
} |
|||
elseif ($this->inchannel) { |
|||
$this->concat( |
|||
$this->channel[ $el ], $text ); |
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
function normalize () { |
|||
// if atom populate rss fields |
|||
if ( $this->is_atom() ) { |
|||
$this->channel['description'] = $this->channel['tagline']; |
|||
for ( $i = 0; $i < count($this->items); $i++) { |
|||
$item = $this->items[$i]; |
|||
if ( isset($item['summary']) ) |
|||
$item['description'] = $item['summary']; |
|||
if ( isset($item['atom_content'])) |
|||
$item['content']['encoded'] = $item['atom_content']; |
|||
|
|||
$atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; |
|||
if ( $atom_date ) { |
|||
$epoch = @parse_w3cdtf($atom_date); |
|||
if ($epoch and $epoch > 0) { |
|||
$item['date_timestamp'] = $epoch; |
|||
} |
|||
} |
|||
|
|||
$this->items[$i] = $item; |
|||
} |
|||
} |
|||
elseif ( $this->is_rss() ) { |
|||
$this->channel['tagline'] = $this->channel['description']; |
|||
for ( $i = 0; $i < count($this->items); $i++) { |
|||
$item = $this->items[$i]; |
|||
if ( isset($item['description'])) |
|||
$item['summary'] = $item['description']; |
|||
if ( isset($item['content']['encoded'] ) ) |
|||
$item['atom_content'] = $item['content']['encoded']; |
|||
|
|||
if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { |
|||
$epoch = @parse_w3cdtf($item['dc']['date']); |
|||
if ($epoch and $epoch > 0) { |
|||
$item['date_timestamp'] = $epoch; |
|||
} |
|||
} |
|||
elseif ( isset($item['pubdate']) ) { |
|||
$epoch = @strtotime($item['pubdate']); |
|||
if ($epoch > 0) { |
|||
$item['date_timestamp'] = $epoch; |
|||
} |
|||
} |
|||
|
|||
$this->items[$i] = $item; |
|||
} |
|||
} |
|||
} |
|||
|
|||
|
|||
function is_rss () { |
|||
if ( $this->feed_type == RSS ) { |
|||
return $this->feed_version; |
|||
} |
|||
else { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
function is_atom() { |
|||
if ( $this->feed_type == ATOM ) { |
|||
return $this->feed_version; |
|||
} |
|||
else { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* return XML parser, and possibly re-encoded source |
|||
* |
|||
*/ |
|||
function create_parser($source, $out_enc, $in_enc, $detect) { |
|||
if ( substr(phpversion(),0,1) == 5) { |
|||
$parser = $this->php5_create_parser($in_enc, $detect); |
|||
} |
|||
else { |
|||
list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); |
|||
} |
|||
if ($out_enc) { |
|||
$this->encoding = $out_enc; |
|||
xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); |
|||
} |
|||
|
|||
return array($parser, $source); |
|||
} |
|||
|
|||
/** |
|||
* Instantiate an XML parser under PHP5 |
|||
* |
|||
* PHP5 will do a fine job of detecting input encoding |
|||
* if passed an empty string as the encoding. |
|||
* |
|||
* All hail libxml2! |
|||
* |
|||
*/ |
|||
function php5_create_parser($in_enc, $detect) { |
|||
// by default php5 does a fine job of detecting input encodings |
|||
if(!$detect && $in_enc) { |
|||
return xml_parser_create($in_enc); |
|||
} |
|||
else { |
|||
return xml_parser_create(''); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* Instaniate an XML parser under PHP4 |
|||
* |
|||
* Unfortunately PHP4's support for character encodings |
|||
* and especially XML and character encodings sucks. As |
|||
* long as the documents you parse only contain characters |
|||
* from the ISO-8859-1 character set (a superset of ASCII, |
|||
* and a subset of UTF-8) you're fine. However once you |
|||
* step out of that comfy little world things get mad, bad, |
|||
* and dangerous to know. |
|||
* |
|||
* The following code is based on SJM's work with FoF |
|||
* @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss |
|||
* |
|||
*/ |
|||
function php4_create_parser($source, $in_enc, $detect) { |
|||
if ( !$detect ) { |
|||
return array(xml_parser_create($in_enc), $source); |
|||
} |
|||
|
|||
if (!$in_enc) { |
|||
if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { |
|||
$in_enc = strtoupper($m[1]); |
|||
$this->source_encoding = $in_enc; |
|||
} |
|||
else { |
|||
$in_enc = 'UTF-8'; |
|||
} |
|||
} |
|||
|
|||
if ($this->known_encoding($in_enc)) { |
|||
return array(xml_parser_create($in_enc), $source); |
|||
} |
|||
|
|||
// the dectected encoding is not one of the simple encodings PHP knows |
|||
|
|||
// attempt to use the iconv extension to |
|||
// cast the XML to a known encoding |
|||
// @see http://php.net/iconv |
|||
|
|||
if (function_exists('iconv')) { |
|||
$encoded_source = iconv($in_enc,'UTF-8', $source); |
|||
if ($encoded_source) { |
|||
return array(xml_parser_create('UTF-8'), $encoded_source); |
|||
} |
|||
} |
|||
|
|||
// iconv didn't work, try mb_convert_encoding |
|||
// @see http://php.net/mbstring |
|||
if(function_exists('mb_convert_encoding')) { |
|||
$encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); |
|||
if ($encoded_source) { |
|||
return array(xml_parser_create('UTF-8'), $encoded_source); |
|||
} |
|||
} |
|||
|
|||
// else |
|||
$this->error("Feed is in an unsupported character encoding. ($in_enc) " . |
|||
"You may see strange artifacts, and mangled characters.", |
|||
E_USER_NOTICE); |
|||
|
|||
return array(xml_parser_create(), $source); |
|||
} |
|||
|
|||
function known_encoding($enc) { |
|||
$enc = strtoupper($enc); |
|||
if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { |
|||
return $enc; |
|||
} |
|||
else { |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
function error ($errormsg, $lvl=E_USER_WARNING) { |
|||
// append PHP's error message if track_errors enabled |
|||
if ( isset($php_errormsg) ) { |
|||
$errormsg .= " ($php_errormsg)"; |
|||
} |
|||
if ( MAGPIE_DEBUG ) { |
|||
trigger_error( $errormsg, $lvl); |
|||
} |
|||
else { |
|||
error_log( $errormsg, 0); |
|||
} |
|||
|
|||
$notices = E_USER_NOTICE|E_NOTICE; |
|||
if ( $lvl&$notices ) { |
|||
$this->WARNING = $errormsg; |
|||
} else { |
|||
$this->ERROR = $errormsg; |
|||
} |
|||
} |
|||
|
|||
|
|||
} // end class RSS |
|||
|
|||
function map_attrs($k, $v) { |
|||
return "$k=\"$v\""; |
|||
} |
|||
|
|||
// patch to support medieval versions of PHP4.1.x, |
|||
// courtesy, Ryan Currie, ryan@digibliss.com |
|||
|
|||
if (!function_exists('array_change_key_case')) { |
|||
define("CASE_UPPER",1); |
|||
define("CASE_LOWER",0); |
|||
|
|||
|
|||
function array_change_key_case($array,$case=CASE_LOWER) { |
|||
if ($case=CASE_LOWER) $cmd=strtolower; |
|||
elseif ($case=CASE_UPPER) $cmd=strtoupper; |
|||
foreach($array as $key=>$value) { |
|||
$output[$cmd($key)]=$value; |
|||
} |
|||
return $output; |
|||
} |
|||
|
|||
} |
|||
|
|||
?> |
@ -1,67 +0,0 @@ |
|||
<?php |
|||
/* |
|||
* Project: MagpieRSS: a simple RSS integration tool |
|||
* File: rss_utils.inc, utility methods for working with RSS |
|||
* Author: Kellan Elliott-McCrea <kellan@protest.net> |
|||
* Version: 0.51 |
|||
* License: GPL |
|||
* |
|||
* The lastest version of MagpieRSS can be obtained from: |
|||
* http://magpierss.sourceforge.net |
|||
* |
|||
* For questions, help, comments, discussion, etc., please join the |
|||
* Magpie mailing list: |
|||
* magpierss-general@lists.sourceforge.net |
|||
*/ |
|||
|
|||
|
|||
/*======================================================================*\ |
|||
Function: parse_w3cdtf |
|||
Purpose: parse a W3CDTF date into unix epoch |
|||
|
|||
NOTE: http://www.w3.org/TR/NOTE-datetime |
|||
\*======================================================================*/ |
|||
|
|||
function parse_w3cdtf ( $date_str ) { |
|||
|
|||
# regex to match wc3dtf |
|||
$pat = "/(\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2})(:(\d{2}))?(?:([-+])(\d{2}):?(\d{2})|(Z))?/"; |
|||
|
|||
if ( preg_match( $pat, $date_str, $match ) ) { |
|||
list( $year, $month, $day, $hours, $minutes, $seconds) = |
|||
array( $match[1], $match[2], $match[3], $match[4], $match[5], $match[6]); |
|||
|
|||
# calc epoch for current date assuming GMT |
|||
$epoch = gmmktime( $hours, $minutes, $seconds, $month, $day, $year); |
|||
|
|||
$offset = 0; |
|||
if ( $match[10] == 'Z' ) { |
|||
# zulu time, aka GMT |
|||
} |
|||
else { |
|||
list( $tz_mod, $tz_hour, $tz_min ) = |
|||
array( $match[8], $match[9], $match[10]); |
|||
|
|||
# zero out the variables |
|||
if ( ! $tz_hour ) { $tz_hour = 0; } |
|||
if ( ! $tz_min ) { $tz_min = 0; } |
|||
|
|||
$offset_secs = (($tz_hour*60)+$tz_min)*60; |
|||
|
|||
# is timezone ahead of GMT? then subtract offset |
|||
# |
|||
if ( $tz_mod == '+' ) { |
|||
$offset_secs = $offset_secs * -1; |
|||
} |
|||
|
|||
$offset = $offset_secs; |
|||
} |
|||
$epoch = $epoch + $offset; |
|||
return $epoch; |
|||
} |
|||
else { |
|||
return -1; |
|||
} |
|||
} |
|||
|
|||
?> |
@ -1,27 +0,0 @@ |
|||
Some example on how to use Magpie: |
|||
|
|||
* magpie_simple.php * |
|||
Simple example of fetching and parsing an RSS file. Expects to be |
|||
called with a query param 'rss_url=http://<some rss file>' |
|||
|
|||
* simple_smarty.php * |
|||
Similiar to magpie_simple, but using the Smarty template engine to do |
|||
display. Also demostrates using rss_utils.inc and a smarty plugin to |
|||
parse and display when each RSS item was published. |
|||
|
|||
* magpie_debug.php * |
|||
Displays all the information available from a parsed feed. |
|||
|
|||
* smarty_plugin/modifier.rss_date_parse.php * |
|||
|
|||
A Smarty plugin for parsing RSS style dates. You must include rss_utils.inc |
|||
for this plugin to work. It also must be installed in the Smarty plugin |
|||
directory, see the Smarty docs for details. |
|||
|
|||
* templates/simple.smarty |
|||
A Smarty template used by simple_smarty.php which demostrates |
|||
displaying an RSS feed and using the date parse plugin. |
|||
|
|||
|
|||
The Smarty template engine and documentation on how to use it are available from |
|||
http://smarty.php.net |
@ -1,80 +0,0 @@ |
|||
<?php |
|||
|
|||
ini_set('display_errors', 1); |
|||
ini_set('error_reporting', E_ALL); |
|||
define('MAGPIE_OUTPUT_ENCODING', 'UTF-8'); |
|||
define('MAGPIE_DIR', '../'); |
|||
define('MAGPIE_DEBUG', 1); |
|||
|
|||
// flush cache quickly for debugging purposes, don't do this on a live site
|
|||
define('MAGPIE_CACHE_AGE', 10); |
|||
|
|||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
|||
|
|||
|
|||
if ( isset($_GET['url']) ) { |
|||
$url = $_GET['url']; |
|||
} |
|||
else { |
|||
$url = 'http://magpierss.sf.net/test.rss'; |
|||
} |
|||
|
|||
|
|||
test_library_support(); |
|||
|
|||
$rss = fetch_rss( $url ); |
|||
|
|||
if ($rss) { |
|||
echo "<h3>Example Output</h3>"; |
|||
echo "Channel: " . $rss->channel['title'] . "<p>"; |
|||
echo "<ul>"; |
|||
foreach ($rss->items as $item) { |
|||
$href = $item['link']; |
|||
$title = $item['title']; |
|||
echo "<li><a href=$href>$title</a></li>"; |
|||
} |
|||
echo "</ul>"; |
|||
} |
|||
else { |
|||
echo "Error: " . magpie_error(); |
|||
} |
|||
?>
|
|||
|
|||
<form> |
|||
RSS URL: <input type="text" size="30" name="url" value="<?php echo $url ?>"><br /> |
|||
<input type="submit" value="Parse RSS"> |
|||
</form> |
|||
|
|||
<h3>Parsed Results (var_dump'ed)</h3> |
|||
<pre> |
|||
<?php var_dump($rss); ?>
|
|||
</pre> |
|||
|
|||
<?php |
|||
|
|||
function test_library_support() { |
|||
if (!function_exists('xml_parser_create')) { |
|||
echo "<b>Error:</b> PHP compiled without XML support (--with-xml), Mapgie won't work without PHP support for XML.<br />\n"; |
|||
exit; |
|||
} |
|||
else { |
|||
echo "<b>OK:</b> Found an XML parser. <br />\n"; |
|||
} |
|||
|
|||
if ( ! function_exists('gzinflate') ) { |
|||
echo "<b>Warning:</b> PHP compiled without Zlib support (--with-zlib). No support for GZIP encoding.<br />\n"; |
|||
} |
|||
else { |
|||
echo "<b>OK:</b> Support for GZIP encoding.<br />\n"; |
|||
} |
|||
|
|||
if ( ! (function_exists('iconv') and function_exists('mb_convert_encoding') ) ) { |
|||
echo "<b>Warning:</b> No support for iconv (--with-iconv) or multi-byte strings (--enable-mbstring)." . |
|||
"No support character set munging.<br />\n"; |
|||
} |
|||
else { |
|||
echo "<b>OK:</b> Support for character munging.<br />\n"; |
|||
} |
|||
} |
|||
|
|||
?>
|
@ -1,29 +0,0 @@ |
|||
<?php |
|||
|
|||
define('MAGPIE_DIR', '../'); |
|||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
|||
|
|||
$url = $_GET['url']; |
|||
|
|||
if ( $url ) { |
|||
$rss = fetch_rss( $url ); |
|||
echo "Channel: " . $rss->channel['title'] . "<p>"; |
|||
echo "<ul>"; |
|||
foreach ($rss->items as $item) { |
|||
$href = $item['link']; |
|||
$title = $item['title']; |
|||
echo "<li><a href=$href>$title</a></li>"; |
|||
} |
|||
echo "</ul>"; |
|||
} |
|||
?>
|
|||
|
|||
<form> |
|||
RSS URL: <input type="text" size="30" name="url" value="<?php echo $url ?>"><br /> |
|||
<input type="submit" value="Parse RSS"> |
|||
</form> |
|||
|
|||
<p> |
|||
<h2>Security Note:</h2> |
|||
This is a simple <b>example</b> script. If this was a <b>real</b> script we probably wouldn't allow strangers to submit random URLs, and we certainly wouldn't simply echo anything passed in the URL. Additionally its a bad idea to leave this example script lying around. |
|||
</p> |
@ -1,66 +0,0 @@ |
|||
<?php |
|||
|
|||
define('MAGPIE_DIR', '../'); |
|||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
|||
|
|||
$url = $_GET['rss_url']; |
|||
|
|||
?>
|
|||
|
|||
<html |
|||
<body LINK="#999999" VLINK="#000000"> |
|||
|
|||
<form> |
|||
<input type="text" name="rss_url" size="40" value="<?php echo $url ?>"><input type="Submit"> |
|||
</form> |
|||
|
|||
<?php |
|||
|
|||
if ( $url ) { |
|||
echo "displaying: $url<p>"; |
|||
$rss = fetch_rss( $url ); |
|||
echo slashbox ($rss); |
|||
} |
|||
|
|||
echo "<pre>"; |
|||
print_r($rss); |
|||
echo "</pre>"; |
|||
?>
|
|||
|
|||
</body> |
|||
</html> |
|||
|
|||
<?php |
|||
|
|||
# just some quick and ugly php to generate html
|
|||
#
|
|||
#
|
|||
function slashbox ($rss) { |
|||
echo "<table cellpadding=2 cellspacing=0><tr>"; |
|||
echo "<td bgcolor=#006666>"; |
|||
|
|||
# get the channel title and link properties off of the rss object
|
|||
#
|
|||
$title = $rss->channel['title']; |
|||
$link = $rss->channel['link']; |
|||
|
|||
echo "<a href=$link><font color=#FFFFFF><b>$title</b></font></a>"; |
|||
echo "</td></tr>"; |
|||
|
|||
# foreach over each item in the array.
|
|||
# displaying simple links
|
|||
#
|
|||
# we could be doing all sorts of neat things with the dublin core
|
|||
# info, or the event info, or what not, but keeping it simple for now.
|
|||
#
|
|||
foreach ($rss->items as $item ) { |
|||
echo "<tr><td bgcolor=#cccccc>"; |
|||
echo "<a href=$item[link]>"; |
|||
echo $item['title']; |
|||
echo "</a></td></tr>"; |
|||
} |
|||
|
|||
echo "</table>"; |
|||
} |
|||
|
|||
?>
|
@ -1,58 +0,0 @@ |
|||
<?php |
|||
|
|||
// Define path to Smarty files (don't forget trailing slash)
|
|||
// and load library. (you'll want to change this value)
|
|||
//
|
|||
// NOTE: you can also simply add Smarty to your include path
|
|||
define('SMARTY_DIR', '/home/kellan/projs/magpierss/scripts/Smarty/'); |
|||
require_once(SMARTY_DIR.'Smarty.class.php'); |
|||
|
|||
// define path to Magpie files and load library
|
|||
// (you'll want to change this value)
|
|||
//
|
|||
// NOTE: you can also simple add MagpieRSS to your include path
|
|||
define('MAGPIE_DIR', '/home/kellan/projs/magpierss/'); |
|||
require_once(MAGPIE_DIR.'rss_fetch.inc'); |
|||
require_once(MAGPIE_DIR.'rss_utils.inc'); |
|||
|
|||
|
|||
// optionally show lots of debugging info
|
|||
# define('MAGPIE_DEBUG', 2);
|
|||
|
|||
// optionally flush cache quickly for debugging purposes,
|
|||
// don't do this on a live site
|
|||
# define('MAGPIE_CACHE_AGE', 10);
|
|||
|
|||
// use cache? default is yes. see rss_fetch for other Magpie options
|
|||
# define('MAGPIE_CACHE_ON', 1)
|
|||
|
|||
// setup template object
|
|||
$smarty = new Smarty; |
|||
$smarty->compile_check = true; |
|||
|
|||
// url of an rss file
|
|||
$url = $_GET['rss_url']; |
|||
|
|||
|
|||
if ( $url ) { |
|||
// assign a variable to smarty for use in the template
|
|||
$smarty->assign('rss_url', $url); |
|||
|
|||
// use MagpieRSS to fetch remote RSS file, and parse it
|
|||
$rss = fetch_rss( $url ); |
|||
|
|||
// if fetch_rss returned false, we encountered an error
|
|||
if ( !$rss ) { |
|||
$smarty->assign( 'error', magpie_error() ); |
|||
} |
|||
$smarty->assign('rss', $rss ); |
|||
|
|||
$item = $rss->items[0]; |
|||
$date = parse_w3cdtf( $item['dc']['date'] ); |
|||
$smarty->assign( 'date', $date ); |
|||
} |
|||
|
|||
// parse smarty template, and display using the variables we assigned
|
|||
$smarty->display('simple.smarty'); |
|||
|
|||
?>
|
@ -1,31 +0,0 @@ |
|||
<?php |
|||
|
|||
/* |
|||
* Smarty plugin |
|||
* ------------------------------------------------------------- |
|||
* Type: modifier |
|||
* Name: rss_date_parse |
|||
* Purpose: parse rss date into unix epoch |
|||
* Input: string: rss date |
|||
* default_date: default date if $rss_date is empty |
|||
* |
|||
* NOTE!!! parse_w3cdtf provided by MagpieRSS's rss_utils.inc |
|||
* this file needs to be included somewhere in your script |
|||
* ------------------------------------------------------------- |
|||
*/ |
|||
|
|||
function smarty_modifier_rss_date_parse ($rss_date, $default_date=null) |
|||
{ |
|||
if($rss_date != '') { |
|||
return parse_w3cdtf( $rss_date ); |
|||
} elseif (isset($default_date) && $default_date != '') { |
|||
return parse_w3cdtf( $default_date ); |
|||
} else { |
|||
return; |
|||
} |
|||
} |
|||
|
|||
|
|||
|
|||
|
|||
?>
|
@ -1,46 +0,0 @@ |
|||
<html> |
|||
<head> |
|||
<title>A Simple RSS Box: I'm not a designer</title> |
|||
</head> |
|||
<body> |
|||
|
|||
<form> |
|||
<b>RSS File:</b> |
|||
<input type=text" name="rss_url" value="{$rss_url}" size="50"> |
|||
<input type="submit"> |
|||
</form> |
|||
|
|||
<b>Displaying:</b> {$rss_url} |
|||
<p> |
|||
|
|||
{* if $error display the error |
|||
elseif parsed RSS object display the RSS |
|||
else solicit user for a URL |
|||
*} |
|||
|
|||
{if $error } |
|||
<b>Error:</b> {$error} |
|||
{elseif $rss} |
|||
<table border=1> |
|||
<tr> |
|||
<th colspan=2> |
|||
<a href="{$rss->channel.link}">{$rss->channel.title}</a> |
|||
</th> |
|||
</tr> |
|||
{foreach from=$rss->items item=item} |
|||
<tr> |
|||
<td> |
|||
<a href="{$item.link}">{$item.title}</a> |
|||
</td> |
|||
<td> |
|||
{$item.dc.date|rss_date_parse|date_format:"%A, %B %e, %Y"} |
|||
</td> |
|||
</tr> |
|||
{/foreach} |
|||
</table> |
|||
{else} |
|||
Enter the URL of an RSS file to display. |
|||
{/if} |
|||
|
|||
</body> |
|||
</html> |
Write
Preview
Loading…
Cancel
Save
Reference in new issue