SQL
x
WITH base_table AS (
SELECT DISTINCT
rev_id, rev_actor, rev_timestamp, rev_len, rev_parent_id,
page_namespace, page_id, actor_id,
REGEXP_SUBSTR(page_title, 'W[a-z]/[a-z]+') AS prefix,
rc_new_len - rc_old_len AS byte_diff
FROM revision rev
JOIN page ON rev.rev_page = page.page_id
JOIN actor ON rev.rev_actor = actor.actor_id
LEFT OUTER JOIN recentchanges rc ON rev.rev_id = rc.rc_this_oldid
WHERE page_namespace IN (0, 1, 10, 11, 14, 15, 828, 829)
HAVING prefix <> ''
ORDER BY rev_id DESC),
since_2019 AS (
SELECT *, YEAR(rev_timestamp) AS year, MONTH(rev_timestamp) AS month
FROM base_table
WHERE YEAR(rev_timestamp) >= 2019),
monthly_grouping AS (
SELECT prefix, month,
COUNT(DISTINCT rev_id) AS edits_by_month,
COUNT(DISTINCT actor_id) AS editors_by_month
FROM since_2019
GROUP BY prefix, month),
avg_table AS (
SELECT prefix,
AVG(edits_by_month) AS avg_edits_since_2019,
AVG(editors_by_month) AS avg_editors_since_2019
FROM monthly_grouping
GROUP BY prefix),
first_edit AS (
SELECT prefix,
MIN(YEAR(rev_timestamp)) AS first_edit
FROM base_table
GROUP BY prefix)
SELECT prefix,
CAST(avg_edits_since_2019 AS INT) AS avg_edits_since_2019,
CAST(avg_editors_since_2019 AS INT) AS avg_editors_since_2019
FROM avg_table a
JOIN first_edit f ON a.prefix = f.prefix
WHERE a.avg_editors_since_2019 <= 2 AND
a.avg_edits_since_2019 <= 5 AND
f.first_edit <= 2018
ORDER BY a.avg_editors_since_2019, a.avg_edits_since_2019
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.