#!/usr/bin/env ruby # -*- coding: utf-8 -*- load 'mwapi.rb' require 'yaml' require 'pp' mw = MWApi.new('https://wiki.parabolagnulinux.org/api.php') credentials = YAML.load_file('credentials.yml') mw.login(credentials['username'], credentials['password']) # apcontinue = '' # while not apcontinue.nil? do # print "Searching...\n" # data = mw.query(:list => :allpages,:aplimit => 200, :apcontinue => apcontinue) # titles = data['query']['allpages'].select{|page| page['title'].split(' ').length > 9 }.map{|page| page['title']} # print "Deleting #{titles.length} articles...\n" # if (titles.length > 0) # mw.delete_by_title(titles, { :reason => 'Spam' }) # end # if data['query-continue'].nil? # apcontinue = nil # else # apcontinue = data['query-continue']['allpages']['apcontinue'] # end # print "apcontinue = #{apcontinue.inspect}\n" # end # continue = '' # while not continue.nil? do # print "Searching...\n" # data = mw.query(:list => :allimages,:ailimit => 200, :aicontinue => continue) # titles = data['query']['allimages'].select{|page| /^File:[A-Z].* [0-9]?[0-9][0-9][0-9]\.jpg$/ =~ page['title'] }.map{|page| page['title']} # print "Deleting #{titles.length} articles...\n" # if (titles.length > 0) # mw.delete_by_title(titles, { :reason => 'Spam' }) # end # if data['query-continue'].nil? # continue = nil # else # continue = data['query-continue']['allimages']['aicontinue'] # end # print "continue = #{continue.inspect}\n" # end @keywords=[ # brand names /(Crimson|Pink|Purple|Green|Orange) Dye/i, /Air Jordan/i, /Andrew Ting/i, /Beats by Dre/i, /Buccaneers/i, /Canada Goose/i, /Club Penguin/i, /Diablo 3/i, /Doudoune/i, /Gamma Blue/i, /Garcinia/i, /Jeffraham/i, /Jordan Fusion/i, /Jordan Retro/i, /Kate Spade/i, /Michael[ _]Kors/i, /\b49ers\b/i, /\bCisco 200-120\b/i, /\bDr\.? Dre\b/i, /\bGucci\b/i, /\bretro 11 /i, /\buggs?\b/i, /officialnflprostore/i, # script kiddie topics / on Hack Wi-Fi$/i, /Cracked Steam/i, /Psn code generator/i, /Steam Key Generator/i, /\bpc games? (free|crack)/i, /crack pc/i, # health topics /Resistance Band/i, /diabetes/i, /elliptical (equipment|machines?)/i, /fat burning/i, /health care/i, /heart (disease|attack)/i, /more wellness/i, /pilates/i, /skin care/i, /weight loss/i, /(body|excess) weight/i, /Arrhythmia/i, /Cardiovascular/, # sex topics /\b(sex|adult) cam/i, /\b(male|breast) enhancement\b/i, /\bpenis\b/i, # other topics /\b(coffee|tea) extract\b/i, /\b(good|quality) social media\b/i, /\b(green|ginseng) (coffee|tea)\b/i, /\b(world|globe|planet) cup\b/i, /\bbaby shower\b/i, /\bcash loan\b/i, /\bclick here\b/i, /\bcredit (check|repair)\b/i, /\bcredit card\b/i, /\bdiy l[ue]x[ue]ry\b/i, /\bgreen pan\b/i, /\bipage (web)?host/i, /\bmen.?s (fashion|casual wear|health)\b/i, /\brap beats\b/i, /\bsearch engine marketing\b/i, /\bsocial media strategy\b/i, /\bvigorous motivators\b/i, # formats /^(aid|assist) on (where|the place)/i, /^A Background In/i, /^An? ( (simple|informative))? analysis of /i, /1st Impressions in/i, /The (Selection|Choice|Decision) of the .* Is Your/i, # unsorted /\b(jerseys?|vegan|shit|marketing|finance|footwear|shoes|muscle|lesbian|islamist|bodybuilding|nfl|nba|ejaculation|wholesale|nike|Hypertension|sherbet|bankrupt|stairlifts?|outfits|surcharges?)\b/i, ] def kw(title) @keywords.each do |re| if re =~ title return true end end return false end apcontinue = '' while not apcontinue.nil? do print "Searching...\n" data = mw.query(:list => :allpages, :aplimit => 200, :apcontinue => apcontinue) titles = data['query']['allpages'].select{|page| kw(page['title']) }.map{|page| page['title']} print "Deleting #{titles.length} articles...\n" if (titles.length > 0) mw.delete_by_title(titles, { :reason => 'Spam' }) end if data['query-continue'].nil? apcontinue = nil else apcontinue = data['query-continue']['allpages']['apcontinue'] end print "apcontinue = #{apcontinue.inspect}\n" end