idtstudios2
Programmer
The following code is supposed to search a page ($html) for tags. It starts out by gathering all links (<a></a>) and then checks to see if rel="tag". It then returns all tags in the following format: hi,wall,brick
Now, the script "technically" works except for the fact that it is processing every link, instead of just the ones with rel="tag". I'm pretty sure that the problem has to deal with a regular expression mistake however I can't find it. Any help or input would be appreciated. thanks.
function get_tag($url)
{
$p_url = parse_url($url);
if (!empty($p_url['path'])):
$path = explode ("/", trim ($p_url['path'], "/")); //gets rid of starting and trailing slash then splits the path string into an array separated by the slash
return ($path[count($path)-1]); // returns the last element of the array
else:
die("error");
endif;
}
function get_tags($html, $url){
if(!$html or !$location){
return false;
}else{
#search through the HTML, save all <link> tags
# and store each link's attributes in an associative array
preg_match_all('/<a\s+(.*?)\s*\/?>/si', $html, $matches);
$links = $matches[1];
$final_links = array();
$link_count = count($links);
for($n=0; $n<$link_count; $n++){
$attributes = preg_split('/\s+/s', $links[$n]);
foreach($attributes as $attribute){
$att = preg_split('/\s*=\s*/s', $attribute, 2);
if(isset($att[1])){
$att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
$final_link[strtolower($att[0])] = $att[1];
}
}
$final_links[$n] = $final_link;
}
#now figure out which one contains the directory tag
for($n=1; $n<$link_count; $n++){
if(strtolower($final_links[$n]['rel']) == 'tag'){
$href = $final_links[$n]['href'];
$temp_tag = get_tag($href);
if ($temp_tag == "error") {
//do nothing
} else {
$tags = $tags . "" . $temp_tag . ",";
}
}
}
if(substr($tags, -1) == ','){
$tags = rtrim($tags, ",");
}
return $tags;
}
}
Now, the script "technically" works except for the fact that it is processing every link, instead of just the ones with rel="tag". I'm pretty sure that the problem has to deal with a regular expression mistake however I can't find it. Any help or input would be appreciated. thanks.
function get_tag($url)
{
$p_url = parse_url($url);
if (!empty($p_url['path'])):
$path = explode ("/", trim ($p_url['path'], "/")); //gets rid of starting and trailing slash then splits the path string into an array separated by the slash
return ($path[count($path)-1]); // returns the last element of the array
else:
die("error");
endif;
}
function get_tags($html, $url){
if(!$html or !$location){
return false;
}else{
#search through the HTML, save all <link> tags
# and store each link's attributes in an associative array
preg_match_all('/<a\s+(.*?)\s*\/?>/si', $html, $matches);
$links = $matches[1];
$final_links = array();
$link_count = count($links);
for($n=0; $n<$link_count; $n++){
$attributes = preg_split('/\s+/s', $links[$n]);
foreach($attributes as $attribute){
$att = preg_split('/\s*=\s*/s', $attribute, 2);
if(isset($att[1])){
$att[1] = preg_replace('/([\'"]?)(.*)\1/', '$2', $att[1]);
$final_link[strtolower($att[0])] = $att[1];
}
}
$final_links[$n] = $final_link;
}
#now figure out which one contains the directory tag
for($n=1; $n<$link_count; $n++){
if(strtolower($final_links[$n]['rel']) == 'tag'){
$href = $final_links[$n]['href'];
$temp_tag = get_tag($href);
if ($temp_tag == "error") {
//do nothing
} else {
$tags = $tags . "" . $temp_tag . ",";
}
}
}
if(substr($tags, -1) == ','){
$tags = rtrim($tags, ",");
}
return $tags;
}
}