Title spider

« see it in action | download title-spider.rar

<!DOCTYPE html> <html> <head> <title>Get the titles of html documents</title> </head> <body bgcolor="white"> <img style="margin-top:2px;" src="titlespider.png"> <br><br> <p style="color:black;"> Insert a list of urls and this tool will try to gather the title with each. Insert the urls one per line. Red urls didn't work, orange urls didn't have a title. </p> <form name="gettitleservice" action="index.php" method="POST"> <textarea cols="190" rows="20" name="urllist"></textarea><br> <input type="submit"> </form> <br><br> <?php $timeBeforeNextPost = 100; // Here you set howmany seconds users have to wait between use. $maxurls = 100; // Here you set howmany urls the user may post at a time. $gettime = time(); // The time the last post took place is stored in a file. // If it doesn't exist this file is created. if (!file_exists('this file holds the time of the last post.txt')) { $handle = fopen('this file holds the time of the last post.txt', 'x+'); fwrite($handle, $gettime); fclose($handle); $lastposttime = $gettime; // If the time file does exist read it's value into a variable. } else { $lastposttime = file_get_contents("this file holds the time of the last post.txt"); } // Check if anything at all was posted. if($_SERVER['REQUEST_METHOD'] == "POST"){ // Get the url list. $urllistb = $_REQUEST["urllist"]; // Check if enough time has expired since the last user used the tool. if (($gettime-$lastposttime) < $timeBeforeNextPost){ echo 'It was less than ' . $timeBeforeNextPost . ' seconds ago when the tool was last used. I have to limit usage a bit sorry. You will have to wait exactly: ' . ($timeBeforeNextPost-($gettime-$lastposttime)) . ' seconds.<br><br><a href="javascript:history.back(1);">go back</a>'; exit; } // Turn the url list into an array. Real men use arrays. $urlArray = explode(PHP_EOL, $urllistb); // Check if the user is perhaps over exited. if(count($urlArray) > $maxurls) { die("sorry, ". $maxurls . " urls is the maximum."); } // Loop though the Array of links. foreach ($urlArray as $theurl){ // Replace https with http. $theurl=str_replace("https://", "http://", $theurl); // The url finding process may take ages therefore we send the unfinished page to the browser. flush(); // Load the html. $thepage=@file_get_contents($theurl); // Did we get anything at all? if($thepage !== FALSE) { // Life is hard enough the way it is, lets just do the lowercases everything. ;$thepage = strtolower($thepage); // Finding the title in the html and extract it. preg_match('/<title>(.+)<\/title>/',$thepage,$matches); // Did we get anything at all? if($matches[1]){ // Strip out the html and write things back to the page. echo strip_tags(ucfirst($matches[1])) . "<br>" . $theurl . "<br><br>"; // If we didn't find a <title> in the document we print the url in orange. }else{ echo "<span style='color:orange;'>" . $theurl . "</span><br><br>"; } // If we didn't find any page at all we print the url in red. }else{ echo "<span style='color:red;'>" . $theurl . "</span><br><br>"; } } } ?>