Fork of Wikimedia incubator stats by KCVelaga
This query is marked as a draft This query has been published by CMyrick-WMF.

SQL

x
 
WITH base_table AS (
  SELECT DISTINCT 
    rev_id, 
    rev_actor, 
    rev_timestamp, 
    rev_len, 
    rev_parent_id,
    page_namespace, 
    page_id, 
    actor_id,
    REGEXP_SUBSTR(page_title, 'W[a-z]/[a-z]+') AS prefix,
    rc_new_len - rc_old_len AS byte_diff
  FROM 
    revision rev
  JOIN 
    page 
    ON rev.rev_page = page.page_id
  JOIN 
    actor
    ON rev.rev_actor = actor.actor_id
  LEFT OUTER JOIN 
    recentchanges rc 
    ON rev.rev_id = rc.rc_this_oldid
  JOIN
    user
    ON actor.actor_id = user.user_id
  WHERE 
    page_namespace IN (0, 1, 10, 11, 14, 15, 828, 829) AND
    -- explicity remove admins who often make edits across multiple incubating projects
    NOT user_name IN ('MF-Warburg', 'Jon Harald Søby', 'Minorax')
  HAVING 
    prefix <> ''
  ORDER BY 
    rev_id DESC),
  primary_metrics AS (
    SELECT 
        prefix, 
        SUM(CASE WHEN byte_diff < 0 THEN byte_diff ELSE 0 END) AS bytes_removed_30D,
        SUM(CASE WHEN byte_diff >= 0 THEN byte_diff ELSE 0 END) AS bytes_added_30D,
        COUNT(DISTINCT actor_id) AS actor_count,
        COUNT(DISTINCT rev_id) AS edit_count,
        COUNT(DISTINCT page_id) AS pages_count
    FROM 
        base_table
    GROUP BY 
        prefix),
  
  last_3M AS (
    SELECT 
        *, 
        YEAR(rev_timestamp) AS year, 
        MONTH(rev_timestamp) AS month
    FROM 
        base_table
    WHERE 
        MONTH(rev_timestamp) >= MONTH(NOW()) - 3 AND 
        MONTH(rev_timestamp) < MONTH(NOW()) AND
        YEAR(rev_timestamp) = YEAR(NOW())),
  
  monthly_grouping_3M AS (
    SELECT 
        prefix, 
        month, 
        COUNT(DISTINCT rev_id) AS edits_by_month,
        COUNT(DISTINCT actor_id) AS editors_by_month
    FROM 
        last_3M
    GROUP BY 
        prefix, 
        month),
    
  avg_table_3M AS (
    SELECT 
        prefix, 
        AVG(edits_by_month) AS avg_edits_3M, 
        AVG(editors_by_month) AS avg_editors_3M
    FROM 
        monthly_grouping_3M
    GROUP BY
        prefix)
SELECT 
    pm.prefix, 
    edit_count, 
    actor_count,
    bytes_added_30D, 
    bytes_removed_30D, 
    pages_count, 
    CAST(avg_edits_3M AS INT) AS avg_edits_3M, 
    CAST(avg_editors_3M AS INT) AS avg_editors_3M
FROM 
    primary_metrics pm
    JOIN avg_table_3M avg_3M ON avg_3M.prefix = pm.prefix
ORDER BY 
    avg_editors_3M DESC
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...