|
printable version
- email this article
Spam class refactoring
by johnk
Saturday, Oct. 21, 2006 at 1:21 AM
This isn't a full refactor...
This is a slightly refactored version of spam_class.inc, called spamb_class.inc. It just splits the Detect() code into individual methods. That way, it can be used to check spam on other parts of the site (or I guess in general). <pre> include_once("shared/global.cfg");
/** * This is a lot like Spam, except it'll operate on arbitrary strings, not just articles. * The different spam detection methods are in separate functions. */ class SpamB {
function SpamB () { // Nothing }
function detect_strings( &$data ) { $stringstext = file_get_contents(SF_CACHE_PATH.'/spam_strings.txt'); $strings = explode("n",$stringstext); foreach ( $strings as $string ) { $string = rtrim( $string ); if (! $string) continue; $lines = explode( "n", $data ); foreach ( $lines as $line ) { if ( preg_match( "/$string/i", $line ) ) { return 1; } } } return 0; } function detect_ip( $ip ) { $lines = file(SF_CACHE_PATH.'/next_ip_to_block.txt' ,'r'); $user_ip=trim($_SERVER['REMOTE_ADDR']); for ( $i=0 ; $i < count($lines) ; $i++) { $saved_ip = rtrim(trim($lines[$i]));
if ( preg_match( "/^$saved_ip$/", $user_ip ) ) { $this->Log( $_SERVER['REMOTE_ADDR']."|". date("m-d-y g:ia")."|". $_SERVER['HTTP_USER_AGENT']."|". $_SERVER['HTTP_REFERER']."|". $_SERVER['REQUEST_URI'] ); return 1; } } return 0; } /** not tested */ function remember_content( &$data ) { if ( $GLOBALS['spam_filter_time'] < (time()-filectime(SF_CACHE_PATH.'/hashes_time')) ) { unlink(SF_CACHE_PATH.'/hashes.txt'); touch(SF_CACHE_PATH.'/hashes.txt'); touch(SF_CACHE_PATH.'/hashes_time'); unlink(SF_CACHE_PATH.'/hashes_content.txt'); touch(SF_CACHE_PATH.'/hashes_content.txt'); } $hashes = fopen(SF_CACHE_PATH.'/hashes.txt','a'); fputs( $hashes, md5($data)."n" ); fclose ($hashes); } /** * @return the number of matches * not tested */ function detect_repeated_content( &$data ) { $hashes = file(SF_CACHE_PATH.'/hashes_content.txt','r'); $dataHash = md5($data); $matched_hashes = 0; foreach( $hashes as $hash ) { if ( $hash == $dataHash ) $matched_hashes++; } return $matched_hashes; }
function Log ($text) {
$log = fopen(SF_CACHE_PATH."/ipblock.log","a"); fwrite($log, date("m-d-y g:ia").": ".$text."n"); fclose($log); }
function too_many_urls( $str ) { $sansUrls = preg_replace( '/http:[a-zA-Z0-9.\/]+?/', '', $str ); $sansHref = preg_replace( '/href/', '', $sansUrls ); $sansUrl = preg_replace( '/url/', '', $sansHref ); $sansWww = preg_replace( '/www[a-zA-Z0-9.\/]+?/', '', $sansUrl ); if ((sizeof($sansWww)/sizeof($str)) > 0.5) return 1; else return 0; } } </pre>
LATEST COMMENTS ABOUT THIS ARTICLE
Listed below are the 10 latest comments of 3 posted about this article.
These comments are anonymously submitted by SF-IMC website visitors.
|
|
|