diff options
Diffstat (limited to 'scratch.rb')
-rwxr-xr-x | scratch.rb | 149 |
1 files changed, 149 insertions, 0 deletions
diff --git a/scratch.rb b/scratch.rb new file mode 100755 index 0000000..2c3bcf6 --- /dev/null +++ b/scratch.rb @@ -0,0 +1,149 @@ +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- + +load 'mwapi.rb' +require 'yaml' +require 'pp' + +mw = MWApi.new('https://wiki.parabolagnulinux.org/api.php') +credentials = YAML.load_file('credentials.yml') +mw.login(credentials['username'], credentials['password']) + +# apcontinue = '' +# while not apcontinue.nil? do +# print "Searching...\n" +# data = mw.query(:list => :allpages,:aplimit => 200, :apcontinue => apcontinue) +# titles = data['query']['allpages'].select{|page| page['title'].split(' ').length > 9 }.map{|page| page['title']} +# print "Deleting #{titles.length} articles...\n" +# if (titles.length > 0) +# mw.delete_by_title(titles, { :reason => 'Spam' }) +# end +# if data['query-continue'].nil? +# apcontinue = nil +# else +# apcontinue = data['query-continue']['allpages']['apcontinue'] +# end +# print "apcontinue = #{apcontinue.inspect}\n" +# end + +# continue = '' +# while not continue.nil? do +# print "Searching...\n" +# data = mw.query(:list => :allimages,:ailimit => 200, :aicontinue => continue) +# titles = data['query']['allimages'].select{|page| /^File:[A-Z].* [0-9]?[0-9][0-9][0-9]\.jpg$/ =~ page['title'] }.map{|page| page['title']} +# print "Deleting #{titles.length} articles...\n" +# if (titles.length > 0) +# mw.delete_by_title(titles, { :reason => 'Spam' }) +# end +# if data['query-continue'].nil? +# continue = nil +# else +# continue = data['query-continue']['allimages']['aicontinue'] +# end +# print "continue = #{continue.inspect}\n" +# end + + +@keywords=[ + # brand names + /(Crimson|Pink|Purple|Green|Orange) Dye/i, + /Air Jordan/i, + /Andrew Ting/i, + /Beats by Dre/i, + /Buccaneers/i, + /Canada Goose/i, + /Club Penguin/i, + /Diablo 3/i, + /Doudoune/i, + /Gamma Blue/i, + /Garcinia/i, + /Jeffraham/i, + /Jordan Fusion/i, + /Jordan Retro/i, + /Kate Spade/i, + /Michael[ _]Kors/i, + /\b49ers\b/i, + /\bCisco 200-120\b/i, + /\bDr\.? Dre\b/i, + /\bGucci\b/i, + /\bretro 11 /i, + /\buggs?\b/i, + /officialnflprostore/i, + # script kiddie topics + / on Hack Wi-Fi$/i, + /Cracked Steam/i, + /Psn code generator/i, + /Steam Key Generator/i, + /\bpc games? (free|crack)/i, + /crack pc/i, + # health topics + /Resistance Band/i, + /diabetes/i, + /elliptical (equipment|machines?)/i, + /fat burning/i, + /health care/i, + /heart (disease|attack)/i, + /more wellness/i, + /pilates/i, + /skin care/i, + /weight loss/i, + /(body|excess) weight/i, + /Arrhythmia/i, + /Cardiovascular/, + # sex topics + /\b(sex|adult) cam/i, + /\b(male|breast) enhancement\b/i, + /\bpenis\b/i, + # other topics + /\b(coffee|tea) extract\b/i, + /\b(good|quality) social media\b/i, + /\b(green|ginseng) (coffee|tea)\b/i, + /\b(world|globe|planet) cup\b/i, + /\bbaby shower\b/i, + /\bcash loan\b/i, + /\bclick here\b/i, + /\bcredit (check|repair)\b/i, + /\bcredit card\b/i, + /\bdiy l[ue]x[ue]ry\b/i, + /\bgreen pan\b/i, + /\bipage (web)?host/i, + /\bmen.?s (fashion|casual wear|health)\b/i, + /\brap beats\b/i, + /\bsearch engine marketing\b/i, + /\bsocial media strategy\b/i, + /\bvigorous motivators\b/i, + # formats + /^(aid|assist) on (where|the place)/i, + /^A Background In/i, + /^An? ( (simple|informative))? analysis of /i, + /1st Impressions in/i, + /The (Selection|Choice|Decision) of the .* Is Your/i, + # unsorted + /\b(jerseys?|vegan|shit|marketing|finance|footwear|shoes|muscle|lesbian|islamist|bodybuilding|nfl|nba|ejaculation|wholesale|nike|Hypertension|sherbet|bankrupt|stairlifts?|outfits|surcharges?)\b/i, +] + +def kw(title) + @keywords.each do |re| + if re =~ title + return true + end + end + return false +end + +apcontinue = '' +while not apcontinue.nil? do + print "Searching...\n" + data = mw.query(:list => :allpages, :aplimit => 200, :apcontinue => apcontinue) + titles = data['query']['allpages'].select{|page| kw(page['title']) }.map{|page| page['title']} + print "Deleting #{titles.length} articles...\n" + if (titles.length > 0) + mw.delete_by_title(titles, { :reason => 'Spam' }) + end + if data['query-continue'].nil? + apcontinue = nil + else + apcontinue = data['query-continue']['allpages']['apcontinue'] + end + print "apcontinue = #{apcontinue.inspect}\n" +end |