User-agent: * Disallow: /cgi-bin/ Disallow: /tmp/ Allow: /opten/cegism/ Allow: /opten/cegtarlight/cegker.php Allow: /opten/ttlight/optentt.php Disallow: /opten/ Disallow: /opten-data/ RewriteCond %{ENV:IS_ROBOT} true #RewriteCond %{REMOTE_HOST} !\.googlebot\.com$ [NC] RewriteRule .* /cgi-bin/bad.pl [L,T=application/x-httpd-cgi] #Robots known or highly suspected of collecting email addresses for spam RewriteCond %{HTTP_USER_AGENT} ^(autoemailspider|Bullseye|CherryPicker|Crescent|ecollector|EmailCollector|Email.Extractor|EmailSiphon|EmailWolf|ExtractorPro|fastlwspider|.*LWP|Digger|.*hhjhj@yahoo|Microsoft.URL|Mozilla/3.Mozilla/2.01|Mozilla.*NEWT|NICErsPRO|SurfWalker|Telesoft|WebBandit|WebEMailExtrac|Zeus.*Webster) [NC,OR] # Robots (sometimes called spiders) which regularly violate robots.txt RewriteCond %{HTTP_USER_AGENT} ^(ADSARobot|.*almaden\.ibm|ASSORT|big.brother|bumblebee|Digimarc|FavOrg|FAST|.*fluffy|.*Girafabot|HomePageSearch|IncyWincy|Ingelin|NPBot|Openfind|OpenTextSiteCrawler|OrangeBot|Robozilla|ScoutAbout|.*searchhippo|searchterms\.it|sitecheck|UIowaCrawler|.*webcraft@bea\.com|WEBMASTERS|WhosTalking|WISEbot|Yandex) [NC,OR] # Agents used for both good and bad purposes, such as sucking up bandwidth # by downloading entire sites, or probing servers for security exploits. RewriteCond %{HTTP_USER_AGENT} ^(ASPSeek|Deweb|Fetch|FlashGet|Getleft|GetURL|GetWebPage|.*HTTrack|KWebGet|libwww-perl|Mirror|NetAnts|NetCarta|netprospector|Net.Vampire|pavuk|PSurf|PushSite|reget|Rsync|Shai|SpiderBot|SuperBot|tarspider|Templeton|w3mir|web.by.mail|WebCopier|WebCopy|WebMiner|WebReaper|WebSnake|WebStripper|webvac|webwalk|WebZIP|Wget|XGET) [NC,OR] # Miscellaneous (suspicious -- more information would be appreciated) RewriteCond %{HTTP_USER_AGENT} ^(ah-ha|aktuelles|amzn_assoc|ATHENS|attache|bew|disco|.*DTS.Agent|Favorites.Sweeper|FEZhead|Generic|GetRight|go-ahead-got-it|.*Harvest|IBM_Planetwide|leech|MCspider|NetResearchServer|nost\.info|OpaL|PackRat|RepoMonkey|.*Rover|Spegla|SqWorm|.*TrueRobot|UtilMind|vspider|.*WUMPUS) [NC,OR] # Blank or 10-letter user agent RewriteCond %{HTTP_USER_AGENT} ^(-?|[A-Z]{10})$ [OR] # A host which tries to hide itself in reverse DNS lookup RewriteCond %{REMOTE_HOST} ^private$ [NC,OR] # Web surveying sites (may require using ipchains) RewriteCond %{HTTP_REFERER} (traffixer|netfactual|netcraft)\.com [NC,OR] RewriteCond %{REMOTE_HOST} \.netcraft\.com$ [NC,OR] # A fake referrer that's often used -- use this unless your pages are related # in some way to atomic energy and could really be linked to from www.iaea.org RewriteCond %{HTTP_REFERER} ^[^?]*iaea\.org [NC,OR] # "addresses.com" is a referer used by an email address extractor RewriteCond %{HTTP_REFERER} ^[^?]*addresses\.com [NC,OR] # A fake referrer that's used in conjuncting with formmail exploits RewriteCond %{HTTP_REFERER} ^[^?]*\.ideography\.co\.uk [NC] # The rule which blocks out further access from the host RewriteRule .* /cgi-bin/bad.pl [L,T=application/x-httpd-cgi]