NEED A GREAT WEB SITE? NEED IT TO BE SEARCH-ENGINE-FRIENDLY?
SEO Egghead is a web development firm dedicated to creating custom, search engine optimized web site applications. We specialize in eCommerce and content management web sites that not only render information beautifully to the human, but also satisfy the "third browser" — the search engine. To us, search engines are people too.
Click here to talk to us. We'd love to help!
class SimpleCloak { // returns the confidence level function isSpider($spider_name = '', $check_uas = true, $check_ips = true) { // default confidence level to 0 $confidence = 0;
// matching user agent? if ($check_uas) if (SimpleCloak::_get(0, $spider_name, 'UA', $_SERVER['HTTP_USER_AGENT'])) $confidence += 2;
// matching IP? if ($check_ips) if (SimpleCloak::_get(0, $spider_name, 'IP', '', $_SERVER['REMOTE_ADDR'])) $confidence += 3;
// return confidence level return $confidence; }
// retrieve cloaking data filtered by the supplied parameters function _get($id = 0, $spider_name = '', $record_type = '', $value = '', $wildcard_value = '') { // by default, retrieve all records $q = " SELECT cloak_data.* FROM cloak_data WHERE TRUE ";
// add filters if ($id) { $id = (int) $id; $q .= " AND id = $id "; } if ($spider_name) { $spider_name = mysql_escape_string($spider_name); $q .= " AND spider_name = '$spider_name' "; } if ($record_type) { $record_type = mysql_escape_string($record_type); $q .= " AND record_type = '$record_type' "; } if ($value) { $value = mysql_escape_string($value); $q .= " AND value = '$value' "; } if ($wildcard_value) { $wildcard_value = mysql_escape_string($wildcard_value); $q .= " AND ( '$wildcard_value' = value OR '$wildcard_value' LIKE CONCAT(value, '.%') ) "; }
// Connect to MySQL server $dbLink = mysql_connect(DB_HOST, DB_USER, DB_PASSWORD) or die("Could not connect: " . mysql_error());
// Connect to the seophp database mysql_select_db(DB_DATABASE) or die("Could not select database");
// execute the query $tmp = mysql_query($q);
// close database connection mysql_close($dbLink);
// return the results as an associative array $rows = array(); while ($_x = mysql_fetch_assoc($tmp)) { $rows[] = $_x; } return $rows; }
// updates the entire database with fresh spider data, but only if our data is // more than 7 days old, and if the online version from iplists.org has changed function updateAll() { // Connect to MySQL server $dbLink = mysql_connect(DB_HOST, DB_USER, DB_PASSWORD) or die("Could not connect: " . mysql_error());
// Connect to the seophp database mysql_select_db(DB_DATABASE) or die("Could not select database");
// retrieve last update information from database $q = "SELECT cloak_update.* FROM cloak_update"; $tmp = mysql_query($q); $updated = mysql_fetch_assoc($tmp); $db_version = $updated['version']; $updated_on = $updated ['updated_on'];
// get the latest update more recent than 7 days, don't attempt an update if (isset($updated_on) && (strtotime($updated_on) > strtotime("-604800 seconds"))) { // close database connection mysql_close($dbLink); // return false to indicate an update wasn't performed return false; }
// read the latest iplists version $version_url = 'http://www.iplists.com/nw/version.php'; $latest_version = mysql_escape_string(file_get_contents($version_url));
// if no updated version information was retrieved, abort if (!$latest_version) { // return false to indicate an update wasn't performed return false; }
// save the update data $q = "DELETE FROM cloak_update"; mysql_query($q); $q = "INSERT INTO cloak_update (version, updated_on) " . "VALUES('$latest_version', NOW())"; mysql_query($q);
// if we already have the current data, don't attempt an update if ($latest_version == $db_version) { // close database connection mysql_close($dbLink); // return false to indicate an update wasn't performed return false; }
// return true to indicate a successful update return true; }
// update the database for the mentioned spider, by reading the provided URL function _updateCloakingDB($spider_name, $url, $ua_regex = '/^# UA "(.*)"$/m', $ip_regex = '/^([0-9.]+)$/m') { // use cURL to read the data from $url // NOTE: additional settings are required when accessing the web through a proxy $ch = curl_init(); curl_setopt ($ch, CURLOPT_URL, $url); curl_setopt ($ch, CURLOPT_HEADER, 1); curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, 1); curl_setopt ($ch, CURLOPT_TIMEOUT, 60); $result = curl_exec($ch); curl_close($ch);
// use _parseListURL to parse the list of IPs and user agents $lists = SimpleCloak::_parseListURL($result, $ua_regex, $ip_regex);
// if the user agents and IPs weren't retrieved, we cancel the update if (!$lists['ua_list'] || !$lists['ip_list']) return;
// lock the cloack_data table to avoid concurrency problems mysql_query('LOCK TABLES cloak_data WRITE');
// delete all the existing data for $spider_name SimpleCloak::_deleteSpiderData($spider_name);
// insert the list of user agents for the spider foreach ($lists['ua_list'] as $ua) { SimpleCloak::_insertSpiderData($spider_name, 'UA', $ua); }
// insert the list of IPs for the spider foreach ($lists['ip_list'] as $ip) { SimpleCloak::_insertSpiderData($spider_name, 'IP', $ip); }
// release the table lock mysql_query('UNLOCK TABLES'); }
// helper function used to parse lists of user agents and IPs function _parseListURL($data, $ua_regex, $ip_regex) { $ua_list_ret = preg_match_all($ua_regex, $data, $ua_list); $ip_list_ret = preg_match_all($ip_regex, $data, $ip_list); return array('ua_list' => $ua_list[1], 'ip_list' => $ip_list[1]); }
// inserts a new row of data to the cloaking table function _insertSpiderData($spider_name, $record_type, $value) { // escape input data $spider_name = mysql_escape_string($spider_name); $record_type = mysql_escape_string($record_type); $value = mysql_escape_string($value);
// build and execute the INSERT query $q = "INSERT INTO cloak_data (spider_name, record_type, value) " . "VALUES ('$spider_name', '$record_type', '$value')"; mysql_query($q); }
// delete the cloaking data for the mentioned spider function _deleteSpiderData($spider_name) { // escape input data $spider_name = mysql_escape_string($spider_name);
// build and execute the DELETE query $q = "DELETE FROM cloak_data WHERE spider_name='$spider_name'"; mysql_query($q); } } ?>
Search engine optimization is not only the job of a marketing staff. It must be considered from a web site's inception and throughout its lifetime by the web site developer. Professional Search Engine Optimization with PHP provides developers with the information they need to create and maintain a search engine friendly web site, and avoid common pitfalls that confuse search engine spiders.