This query is marked as a draft This query has been published by CMyrick-WMF.

SQL

x
 
WITH base_table AS (
  SELECT DISTINCT 
    rev_id, rev_actor, rev_timestamp, rev_len, rev_parent_id,
    page_namespace, page_id, actor_id,
    REGEXP_SUBSTR(page_title, 'W[a-z]/[a-z]+') AS prefix,
    rc_new_len - rc_old_len AS byte_diff
  FROM revision rev
  JOIN page ON rev.rev_page = page.page_id
  JOIN actor ON rev.rev_actor = actor.actor_id
  LEFT OUTER JOIN recentchanges rc ON rev.rev_id = rc.rc_this_oldid
  WHERE page_namespace IN (0, 1, 10, 11, 14, 15, 828, 829)
  HAVING prefix <> ''
  ORDER BY rev_id DESC),
  
  
  since_2019 AS (
    SELECT *, YEAR(rev_timestamp) AS year, MONTH(rev_timestamp) AS month
    FROM base_table
    WHERE YEAR(rev_timestamp) >= 2019),
  
  monthly_grouping AS (
    SELECT prefix, month, 
        COUNT(DISTINCT rev_id) AS edits_by_month,
        COUNT(DISTINCT actor_id) AS editors_by_month
    FROM since_2019
    GROUP BY prefix, month),
    
  avg_table AS (
    SELECT prefix, 
        AVG(edits_by_month) AS avg_edits_since_2019, 
        AVG(editors_by_month) AS avg_editors_since_2019
    FROM monthly_grouping
    GROUP BY prefix),
    
  first_edit AS (
    SELECT prefix,
           MIN(YEAR(rev_timestamp)) AS first_edit
    FROM base_table
    GROUP BY prefix)
SELECT prefix,
       CAST(avg_edits_since_2019 AS INT) AS avg_edits_since_2019, 
       CAST(avg_editors_since_2019 AS INT) AS avg_editors_since_2019
FROM avg_table a
JOIN first_edit f ON a.prefix = f.prefix
WHERE a.avg_editors_since_2019 <= 2 AND
      a.avg_edits_since_2019 <= 5 AND
      f.first_edit <= 2018
ORDER BY a.avg_editors_since_2019, a.avg_edits_since_2019 
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...