Untitled query #39707

SQL

import time

import requests
from IPython import display
import pywikibot
from pywikibot import pagegenerators

def get_sitematrix():
    """Request the sitematrix from the API, check if open, then yeild URLs"""
    
    def check_status(checksite):
        """Return true only if wiki is public and open"""
        return ((checksite.get('closed') is None)
            and (checksite.get('private') is None)
            and (checksite.get('fishbowl') is None))
    
    
    payload = {"action": "sitematrix", "format": "json",
               "smlangprop": "site", "smsiteprop": "url",}
    headers = {'user-agent':
               'HijackSpam on PAWS; User:AntiCompositeBot, pywikibot/'
               + pywikibot.__version__}
    url = 'https://meta.wikimedia.org/w/api.php'
    r = requests.get(url, headers=headers, params=payload)
    r.raise_for_status()
    
    result = r.json()['sitematrix']
    for key, lang in result.items():
        if key == 'count':
            continue
        elif key == 'specials':
            for site in lang:
                if check_status(site):
                    yield site['url']
        else:
            for site in lang['site']:
                if check_status(site):
                    yield site['url']


def list_pages(site, target):
    """Takes a pywikibot site object and yeilds the pages linking to the target"""
    
    for num in range(0,4):
        if num % 2 == 0:
            protocol = 'http'
        else:
            protocol = 'https'
            
        if num > 1:
            ctar = '*.' + target
        else:
            ctar = target
        
        for page in pywikibot.pagegenerators.LinksearchPageGenerator(
            ctar, site=site, protocol=protocol):
            yield page

            
def output(text):
    display.display_markdown(text, raw=True)
    
    
def prep_markdown(pages, site):
    md = '' 
    count = 0
    for page in pages:
        count += 1
        md += f'* [{page.title()}]({page.full_url()})\n'
    if count > 0:
        md = f'## {site.dbName()}: {count} \n' + md
        output(md)
        return count

    
def summary_table(counts):
    md = '## Summary\n\n|Wiki|Count|\n|---|---|\n'
    wt = '{| class="wikitable"\n|-\n! Wiki !! Count !! Volunteer !! Progress\n'
    for wiki, count in counts.items():
        if count is not None:
            md += f'|{wiki}|{str(count)}|\n'
            wt += f'|-\n| {wiki} || {str(count)} || || \n'
    
    wt += '|}'
    md += '\n```\n' + wt + '\n```\n'
    output(md)
    
        
def main():
    target = 'blackwell-synergy.com'
        
    counts = {}
    try:
        sitematrix = get_sitematrix()
    except requests.exceptions:
        time.sleep(5)
        sitematrix = get_sitematrix()
        
    output('Scanning all public wikis for ' + target + ' at ' + time.asctime() + '\n')
    for url in sitematrix:
        try:
            cur_site = pywikibot.Site(url=url + '/wiki/MediaWiki:Delete/en')
        except Exception:
            output('Skipping ' + url)
            continue
            
        pages = list_pages(cur_site, target)
        counts[cur_site.dbName()] = prep_markdown(pages, cur_site)
        
    summary_table(counts)
    output('Finished')
SQL

Checking query status...