# vim: set syntax=perl ts=4 ai si: ######################### IMPORTANT! - READ CAREFULLY ######################## # This file contains some parts of my own cleafeed.local. # Many of these checks are too much content based than what I feel # confortable to put in the official source and others are just # experimental or need site-specific tweaks. # Before using any of this code please *think*, and be sure you really # understand what it does. ######################### IMPORTANT! - READ CAREFULLY ######################## my @badaspnntps = ( 'PostIT Now', 'Jobsearch Limited', 'AudioWeb', # audioweb.com 'Alex', # sex spam 'Paul Simmons', # OperationIT.com 'Alan', # equest.com 'Digital Media Works', # html sex spam 'Captive Technology', # ccsscorp.com jobs flood 'Computer Horzions ISG', # isgjobs.com jobs flood 'Mike Powers', # ResumeGateway.com jobs flood ); my %badaspnntp = map { $_ => 1 } @badaspnntps; undef @badaspnntps; sub local_filter_first { # my $localpost = 0; # # $localpost = 1 if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.visyn\.net /; # # study $hdr{__BODY__} if $hdr{__LINES__} <= 250; # # # local posts ############################################################ # if ($localpost) { # if ($hdr{Approved}) { # foreach (@groups) { # if (not /^(?:alt|wind|inwind)\./) { # saveart('CF.approved'); # return reject("Forged approval in $_ ($hdr{Approved})"); # } # } # } # ## if ($hdr{Subject} =~ /^R: /) { saveart('L.r'); } # # return reject('Non usare HTML in Usenet!') # if $hdr{'Content-Type'} and ($hdr{'Content-Type'} =~ m#text/html# # or $hdr{'Content-Type'} =~ m#multipart/alternative#); # } ########################################################################## # # save articles coming from broken sites so I can LART them # foreach (@groups) { # next unless $hdr{Newsgroups} =~ /^it/; # $gr{it}++; # saveart('W.nomod') if exists $Moderated{$_} and not $hdr{Approved}; # } # # enforce it.* hierarchy restrictions # if ($gr{it} and (@groups > 10 or @followups > 3)) { # saveart('CF.ECP'); # return reject('Excessive crosspost'); # } # specific sites or companies ############################################ return reject("Job spam ($1)") if $hdr{From} =~ /@(ajilon\.ca|ntes\.com|trai\.com|lesliecorp\.com|topechelon\.net|ERecruitingWorld\.com|(?:data\.)?JobBankUSA\.com|resumes\.gojobs\.com|chemjobs\.net|eurosoft-inc\.com|newlonservices\.com|medzilla\.com|gisajob\.com|geologics\.com|brainhunter\.com|dsijobs\.com|offsitetechies\.com)\b/ or $hdr{'Message-ID'} =~ /\@((?:webhire|hrsites|jobcircle|sans)\.com)>$/; # return reject("Google + gmail Spam ($1)") # if $hdr{From} =~ /\@gmail\.com/ # and $hdr{'Message-ID'} =~ /\@googlegroups\.com>$/; return reject("Individual google spammer ($1)") if $hdr{From} =~ /(yasukoday|maudegen|ulyssestheo|maryleejanaych|pamalabrendanh|lalamarikoma|deliladarlee|maekrishna|justnbestG)\@gmail\.com\b/; return reject("MI5-Spam") if $lch{subject} =~ /^m.*i.*5.*p.*e.*r.*s.*e.*c.*u.*t.*i.*o.*n.*/; return reject('NNTP Monitor', 'Bot Signature') if $hdr{From} =~ /^NNTP-Monitor\@/; } sub local_filter_bot { if ($hdr{'X-Newsreader'}) { if ($hdr{'X-Newsreader'} =~ /^AspNNTP \S+ \((.*)\)/) { return reject('AspNNTP', 'Bot signature') if exists $badaspnntp{$1}; #saveart('W.aspnntp', $hdr{'X-Newsreader'}); } } } # most articles with hashbusters are caught by the MD5 filter anyway, I need # to check why there are not sub local_filter_after_emp { if ($hdr{__LINES__} < 250 and not $gr{reports}) { if ($hdr{__LINES__} < 25) { return reject('lcbot 60+end+short', 'Bot signature') if $hdr{__BODY__} =~ /\n[a-z]{60,}\n+$/; return reject('lcbot 7+only+num', 'Bot signature') if $hdr{Subject} =~ / \d{4,5}/ and $hdr{__BODY__} =~ /^\n[a-z]{7,}\n+$/; return reject('lcbot 12+end+short+num', 'Bot signature') if $hdr{Subject} =~ / \d{2,5}$/ and $hdr{__BODY__} =~ /\n[a-z]{12,}\n+$/; } return reject('lcbot 100', 'Bot signature') if $hdr{__BODY__} =~ /^[a-z]{100,}$/m; return reject('lcbot 80+end', 'Bot signature') if $hdr{__BODY__} =~ /\n[a-z]{80,}\n+$/; return reject('lcbot 30+num', 'Bot signature') if $hdr{Subject} =~ / \d{2,5}$/ and $hdr{__BODY__} =~ /^[a-z]{30,}$/m; if (not $hdr{References} and $hdr{__BODY__} =~ /\n{2,}[a-zA-Z0-9]{27,}\n+$/) { if ($hdr{Subject} =~ / [a-zA-Z0-9]{1,}$/) { saveart('W.mchash'); return reject('mcbot 30+end', 'Bot signature'); } saveart('W.mchash2'); # all f.p. } } # hdr{__LINES__} < 250 and not $gr{reports} return ''; } sub local_filter_last { # body checks ############################################################ if ($hdr{__LINES__} < 250 and not $gr{reports}) { # Warning: this check has some false positives # if ($hdr{Subject} =~ m#\[[^0]/[^1]\]$# and not $hdr{References} # and $hdr{__BODY__} =~ /\n[a-z]{12,}\n*$/ # and $hdr{__BODY__} !~ /^begin [0-7]{3,4} /m # and not is_binary() # XXX # ) { # saveart('CF.sette0'); # return reject('7 bot', 'Bot signature'); # } # I suppose I can't add new domains forever if (not $hdr{'X-Mailer'} and not $hdr{'X-Newsreader'} and not $hdr{References} and $hdr{__BODY__} =~ /www\.(?:pure-instinct\.com|get-some-mojo\.com|magnetizewomen\.com|makeherscream\.net|wantmoresex\.com|lovesenses\.com|sexfit\.net|enhancelibido\.net|lovesenses\.com|bettersexlife\.com|erect4life\.com|androsfit\.com|smokefreelungs\.com|evidencegone\.com|biggertool\.com|forthepuss\.com|moreladies\.com|improve-libido\.com|openthathole\.com|at7x\.com|fuas\.net|dheafit\.com|sexboxoffice\.com|increasemanhood\.com|getsomeass\.com|nicotineaddict\.net|perkupsexdrive\.com|dateseverynight\.com|hot-products\.net|greatproducts\.net|landinbed\.com|getfemales\.net|sexattention\.com|allurefem\.com|smokerusa\.com|improve-libido\.com|youngeryears\.com|compelthem\.net|fightimpotency\.com|drawherin\.com|invitelust\.com|youlivelonger\.com|free-cigarettes\.|cheap-cigarettes\.)/) { # saveart('CF.repsisdom'); return reject('Repsis'); } } # my $localpost = 0; # $localpost = 1 if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.inwind\.it /; # # if ($config{watch_cancels} and $localpost) { # $LocalPosts{$hdr{'Message-ID'}} = $now; # } # saveart('W.longsubj') if length $hdr{Subject} > 160; # saveart('W.space') if $hdr{Subject} =~ / {15,}[^ ]/; # saveart('W.repostnotrej') # if $hdr{Subject} =~ /^REPOST: / and $hdr{Path} =~ /!resurrector!/; return ''; } sub local_filter_cancel { my $localpost = 0; $localpost = 1 if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.visyn\.net /; my $id = $hdr{Cancel}; $id =~ s/.* //; return '' if not $id; if ($config{reject_suspect_cancels} and $localpost and not INN::havehist($1)) { # return reject('Cancel for a missing article', 'Rogue cancel'); saveart('W.localcancelunknown'); } if ($config{watch_cancels} and $localpost and not $LocalPosts{$id}) { # return reject('Cancel for a non local article', 'Rogue cancel'); saveart('W.nonlocalcancel'); } return reject('Rogue cancel (mindspring)') if $hdr{Approved} =~ /deputydawg\@altavista\.com/; if ($hdr{__LINES__} > 20 and $hdr{__BODY__} !~ /^Path: /m) { saveart('R.long'); return reject('Rogue cancel (long body)', 'Rogue cancel'); } return ''; } # here I save some articles I want to check. sub local_filter_reject { my ($vr, $sr) = @_; # saveart('CF.local', $vr) # if $hdr{'X-Trace'} and $hdr{'X-Trace'} =~ /\.inwind\.it /; # saveart('WARN.it', $vr) # if $vr =~ /^NewsAgent/ and $hdr{Newsgroups} =~ /\bit\./; # saveart('W.supersedes') if $vr =~ /^Excessive Supersedes/; ## saveart('CF.scoring', $vr) if $vr =~ /^Scoring filter/; # saveart('CF.NewsAgent', $vr) if $vr =~ /^NewsAgent/; # saveart('CF.SEX', $vr) if $vr =~ /^Sex spam/ and $lines < 300; # saveart('Z.EMP', $vr) if $vr =~ /^EMP/; # saveart('R.nanacancel') if $vr eq 'Cancel in forbidden group'; # return @_; } sub local_config { %config_local = ( block_late_cancels => 1, active_file => '/var/lib/news/active', statfile => '/var/log/news/cleanfeed.stats', # html_statfile => '/var/log/news/cleanfeed.stats.html', stats_interval => 300, do_emp_dump => 1, emp_dump_file => '/var/lib/news/empdump', debug_batch_directory => '/var/lib/news/spam', aggressive => 1, do_md5 => 1, md5maxmultipost => 1, MD5History => 11000, MD5maxlife => 24, fuzzy_md5 => 0, fuzzy_max_length => 700, md5_skips_followups => 1, MD5HistSize => 7000, do_phl => 1, do_fsl => 1, maxmultiposts => 20, ArticleHistory => 7000, EMPmaxlife => 24, EMPHistSize => 1500, maxgroups => 14, low_xpost_maxgroups => 6, block_binaries => 1, max_encoded_lines => 15, binaries_in_mod_groups => 0, block_mime_html => 1, block_html => 0, MIDmaxlife => 1, do_scoring_filter => 1, do_mid_filter => 1, do_bot_checks => 1, do_supersedes_filter => 1, check_supersedes_path => 1, drop_useless_controls => 1, drop_ihave_sendme => 1, drop_control_with_supersedes => 1, trimcycles => 500, EMPstarttrimming => 500, verbose => 1, logfile => undef, reportfile => undef, timer_info => 1, timer_interval => 300, ); %config_append = ( bin_allowed => '^visyn\.', # poison_groups => undef, # html_allowed => undef, # md5exclude => undef, # allexclude => undef, low_xpost_groups => '^hannover\.|fido\.ger\.', ### used to build domain names for URL matching 'badguys' => 'wildchild|ilovelez|sexjunky|nymphette|stobblehouse|hard-core'. '|latexfetish|harddicks|\w+\.mnet1|pictureview|lasersex|sexypussy|thailady-flirt'. '|lick|orgasmic|malebytes|southcorp|mansclub|inet-images|pornschool'. '|hotsexnow|bmc-engineering|nastygirlz|marys-place|ucla\.dorms|savetrees'. '|(?:\w+\.)sexzilla|netzilla|jalapeno|orchidvideos|blowme|totallyfreesex'. '|sexplosion|unitedadultsites|rsi-net|cathouse|forbiddenphotos'. '|amateur-xposure|teencity|snmworld|spck|fuck|anal|xxx|ukswingers'. '|icl|uncc|hyperbooks|sexandpussy|fuckmenow|dalounge|eurosluts'. '|pornoemporium|cuntlick|vianews|knotwork|sex-e-world|pornopeople'. '|teenmasturbation|monkeyspanker|babelicks|ostomy|traders-update'. '|nasty|xnasty|hot\.pornofetish|topnotchadult|transgenderweb|sasha2'. '|xs4all|world-gaming|totalpussy|valleyofsex|sexy1234|thebestxxx'. '|sex4younow|pluckit|hardcorezone|sexymail2|teenvoyeurism|smellslikefish'. '|jagworld|ultimatexxx|domxxx|fetishscat|xxxlinks|redhousemedia'. '|freepink|yiws|xxx-citing|magicus|bsi-service|teenpussy4free|ddacsex'. '|adulttheatre|web-dream|misako3a|archivedpix|sexdevil|necrobabes'. '|foxfiles\.inter|anni|thebestxxx\.home\.ml|bigsexyland|wwwcumshots'. '|sexpussyanal|hotcollegesex|tomasino|uninets|arcticera|slutomatic'. '|landofvenus|sexyflorida|pussymuncher|midnightpassion|adulttheater'. '|websextv|adultdigital|toilettarts|heavyhangers|lustybbws|nicebeaver'. '|alternativeconnections|freerawsex|greatwebsites|real-sex|lisnet|freesex4u'. '|freesexpalace|freehardcore|crystalv|purehardcore|storm|anal-lover'. '|adultteens|cumsee|cumseeme|adultxxxfest|free4adults|arrakis|interbbs'. '|vixenvideo|thecyberpimps|pussycity|centre-europe|adult-sex-world'. '|lilbluemoon|romsnlaser|porn-station\.holowww|slickpages|stena|hyperion'. '|sex4younow|pluckit|hardcorezone|sexymail2|teenvoyeurism|smellslikefish'. '|jagworld|ultimatexxx|domxxx|fetishscat|xxxlinks|redhousemedia'. '|freepink|yiws|xxx-citing|magicus|bsi-service|teenpussy4free|ddacsex'. '|adulttheatre|web-dream|misako3a|archivedpix|sexdevil|necrobabes'. '|foxfiles\.inter|anni|thebestxxx\.home\.ml|bigsexyland|wwwcumshots'. '|sexpussyanal|hotcollegesex|tomasino|uninets|arcticera|slutomatic'. '|landofvenus|sexyflorida|pussymuncher|midnightpassion|adulttheater'. '|websextv|adultdigital|toilettarts|heavyhangers|lustybbws|nicebeaver'. '|alternativeconnections|freerawsex|greatwebsites|real-sex|lisnet|freesex4u'. '|freesexpalace|freehardcore|crystalv|purehardcore|storm|anal-lover'. '|adultteens|cumsee|cumseeme|adultxxxfest|free4adults|arrakis|interbbs'. '|vixenvideo|thecyberpimps|pussycity|centre-europe|adult-sex-world'. '|lilbluemoon|romsnlaser|porn-station\.holowww|slickpages|stena|hyperion'. '|gangstergreed|mrfreefree|sexdatingpoint|SexDatingPoint|watchesprice|watches-brand'. '|watchesblog|watchesprice|hotwatch|watches-brand|hotwatchshop'. '|aecl|sexdatingpoint|free-cigarettes|cheap-cigarettes|cashfiesta|joebucks|', ### matched against domain names in URLs 'baddomainpat' => 'free-cigarettes|cheap-cigarettes|sexdatingpoint|watchesblog|watchesprice|hotwatch|watches-brand|hotwatchshop', ### regexps to exempt from NNTP-Posting-Host EMP filter # 'exempt' => undef, ### regexps to exempt from excessive supersedes filter # 'supersedes_exempt' => undef, ### Reject cancels (and possibly supersedes) with these path elements # 'bad_cancel_paths' => undef, ### refuse articles with these in the message-id (INN only) # 'refuse_messageids' => undef, ); $Restricted_Groups{netscape} = '^netscape\.'; # $Restricted_Groups{fidoger} = '^fido\.ger\.'; $Restricted_Groups{pfnz} = '^pfnz\.'; $config{reject_suspect_cancels} = 0; $config{watch_cancels} = 0; if ($config{watch_cancels}) { eval { require AnyDBM_File; import AnyDBM_File; require Fcntl; import Fcntl; }; # XXX ugly if ($@) { $config{watch_cancels} = undef; slog('E', 'Cannot load AnyDBM_File: ' . $@); } tie %LocalPosts, 'AnyDBM_File', "$config_dir/posts", &Fcntl::O_CREAT|&Fcntl::O_RDWR, 0666 or slog('E', 'Cannot load AnyDBM_File: ' . $!); } } print $now.$config_dir.$lines.%Restricted_Groups.%Moderated.%config_local.%config_append.@followups if 0; # lint food 1;