Browse Source
Removed examples which don't actually work anymore for PHP 4, and
Removed examples which don't actually work anymore for PHP 4, and
updated those that do for PHP 5.PHP-5.1
5 changed files with 5 additions and 160 deletions
-
4ext/tidy/examples/cleanhtml5.php
-
93ext/tidy/examples/dumpit.php
-
4ext/tidy/examples/dumpit5.php
-
62ext/tidy/examples/urlgrab.php
-
2ext/tidy/examples/urlgrab5.php
@ -1,93 +0,0 @@ |
|||
<?php |
|||
/* |
|||
* dumpit.php |
|||
* |
|||
* a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc. |
|||
* file as it is represented in the document model. |
|||
* |
|||
* By: John Coggeshall <john@php.net> |
|||
* |
|||
* Usage; php dumpit.php <filename> |
|||
*/ |
|||
|
|||
tidy_parse_file($_SERVER['argv'][1]); |
|||
|
|||
/* Optionally you can do this here if you want to fix up the document */ |
|||
|
|||
/* tidy_clean_repair(); */ |
|||
|
|||
$tree = tidy_get_root(); |
|||
dump_tree($tree); |
|||
echo "\n"; |
|||
|
|||
function node_type($type) { |
|||
|
|||
switch($type) { |
|||
|
|||
case TIDY_NODETYPE_ROOT: return "Root Node"; |
|||
case TIDY_NODETYPE_DOCTYPE: return "DocType Node"; |
|||
case TIDY_NODETYPE_COMMENT: return "Comment Node"; |
|||
case TIDY_NODETYPE_PROCINS: return "ProcIns Node"; |
|||
case TIDY_NODETYPE_TEXT: return "Text Node"; |
|||
case TIDY_NODETYPE_START: return "Start Node"; |
|||
case TIDY_NODETYPE_END: return "End Node"; |
|||
case TIDY_NODETYPE_STARTEND: return "Start/End Node"; |
|||
case TIDY_NODETYPE_CDATA: return "CDATA Node"; |
|||
case TIDY_NODETYPE_SECTION: return "Section Node"; |
|||
case TIDY_NODETYPE_ASP: return "ASP Source Code Node"; |
|||
case TIDY_NODETYPE_PHP: return "PHP Source Code Node"; |
|||
case TIDY_NODETYPE_JSTE: return "JSTE Source Code"; |
|||
case TIDY_NODETYPE_XMLDECL: return "XML Declaration Node"; |
|||
default: return "Unknown Node"; |
|||
} |
|||
} |
|||
|
|||
function do_leaf($string, $indent) { |
|||
for($i = 0; $i < $indent; $i++) { |
|||
echo " "; |
|||
} |
|||
echo $string; |
|||
} |
|||
|
|||
function dump_tree($node, $indent = 0) { |
|||
if($node) { |
|||
/* Put something there if the node name is empty */ |
|||
$nodename = trim(strtoupper($node->name)); |
|||
$nodename = (empty($nodename)) ? "[EMPTY]" : $nodename; |
|||
|
|||
/* Generate the Node, and a pretty name for it */ |
|||
do_leaf(" + $nodename (".node_type($node->type).")\n", $indent); |
|||
|
|||
/* Check to see if this node is a text node. Text nodes are |
|||
generated by start/end tags and contain the text in between. |
|||
i.e. <B>foo</B> will create a text node with $node->value |
|||
equal to 'foo' */ |
|||
if($node->type == TIDY_NODETYPE_TEXT) { |
|||
do_leaf(" |\n", $indent); |
|||
do_leaf(" +---- Value: '{$node->value}'\n", $indent); |
|||
} |
|||
|
|||
/* Any attributes on this node? */ |
|||
if(count($node->attributes())) { |
|||
do_leaf(" |\n", $indent); |
|||
do_leaf(" +---- Attributes\n", $indent); |
|||
|
|||
/* Cycle through the attributes and display them and their values. */ |
|||
foreach($node->attributes() as $attrib) { |
|||
do_leaf(" +--{$attrib->name}\n", $indent); |
|||
do_leaf(" | +-- Value: {$attrib->value}\n", $indent); |
|||
} |
|||
} |
|||
|
|||
/* Recurse along the children to generate the remaining nodes */ |
|||
if($node->has_children()) { |
|||
foreach($node->children() as $child) { |
|||
dump_tree($child, $indent + 3); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
echo tidy_get_output(); |
|||
|
|||
?>
|
|||
@ -1,62 +0,0 @@ |
|||
<?php |
|||
|
|||
/* |
|||
* urlgrab.php |
|||
* |
|||
* A simple command-line utility to extract all of the URLS contained |
|||
* within <A HREF> tags from a document. |
|||
* |
|||
* NOTE: Only works with tidy for PHP 4.3.x, please see urlgrab5.php for tidy for PHP 5 |
|||
* |
|||
* By: John Coggeshall <john@php.net> |
|||
* |
|||
* Usage: php urlgrab.php <file> |
|||
* |
|||
*/ |
|||
|
|||
/* Parse the document */ |
|||
tidy_parse_file($_SERVER['argv'][1]); |
|||
|
|||
/* Fix up the document */ |
|||
tidy_clean_repair(); |
|||
|
|||
/* Get an object representing everything from the <HTML> tag in */ |
|||
$html = tidy_get_html(); |
|||
|
|||
/* Traverse the document tree */ |
|||
print_r(get_links($html)); |
|||
|
|||
function get_links($node) { |
|||
$urls = array(); |
|||
|
|||
/* Check to see if we are on an <A> tag or not */ |
|||
if($node->id == TIDY_TAG_A) { |
|||
/* If we are, find the HREF attribute */ |
|||
$attrib = $node->get_attr(TIDY_ATTR_HREF); |
|||
if($attrib) { |
|||
/* Add the value of the HREF attrib to $urls */ |
|||
$urls[] = $attrib->value; |
|||
} |
|||
|
|||
} |
|||
|
|||
/* Are there any children? */ |
|||
if($node->has_children()) { |
|||
|
|||
/* Traverse down each child recursively */ |
|||
foreach($node->children() as $child) { |
|||
|
|||
/* Append the results from recursion to $urls */ |
|||
foreach(get_links($child) as $url) { |
|||
|
|||
$urls[] = $url; |
|||
|
|||
} |
|||
|
|||
} |
|||
} |
|||
|
|||
return $urls; |
|||
} |
|||
|
|||
?>
|
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue