This query is marked as a draft This query has been published by Krinkle.

SQL

x
 
-- Step 1: Get page ID
-- https://www.mediawiki.org/w/index.php?title=Project:Sandbox&action=history
-- page_id: 1428
-- Step 2: Get most common rev_sha1 of each year (i.e. the one we revert/rollback/undo toward)
/*
SELECT
    COUNT(*) _count,
    SUBSTR(rev_timestamp, 1, 4) _year,
    rev_sha1
FROM revision
WHERE rev_page=1428
GROUP BY _year, rev_sha1
ORDER BY _count DESC
LIMIT 100;
*/
/*
_year, rev_sha1
2011-2025, ni8p3ypkq39hxrmit1g3300saroa1mk (common action: reset to sandbox template)
........., phoiac9h4m842xq45sp7s6u21eteeq1 (common revision content: blank)
*/
-- Step 3: How common is this content, within this page, overall?
/*
SELECT COUNT(*) _count FROM revision
  WHERE rev_page=1428
  AND rev_sha1='ni8p3ypkq39hxrmit1g3300saroa1mk';
*/
/*
_count
15,583 revision IDs
*/
-- Step 4: How common is content re-use?
/*
SELECT COUNT(*) _count, slot_content_id FROM slots
WHERE slot_revision_id IN (
  SELECT rev_id
  FROM revision
  WHERE rev_page=1428
  AND rev_sha1='ni8p3ypkq39hxrmit1g3300saroa1mk'
  ORDER BY rev_timestamp DESC
)
GROUP BY slot_content_id
HAVING _count > 1
ORDER BY _count DESC;
*/
/*
70 rows
_count DESC, slot_content_id
5,      6577768
5,      6572543
4,      5925083
...
_count, slot_content_id ASC
2,      1913184
2,      1932853
2,      1932892
...
*/
-- Step 5: Obtain sample revisions of the most common ones
/*
SELECT slot_revision_id, slot_content_id
FROM slots
WHERE slot_content_id IN (6577768, 6572543, 5925083)
ORDER BY slot_revision_id ASC;
*/
-- Step 6: Obtain sample revisions of the oldest ones
/*
SELECT slot_revision_id, slot_content_id
FROM slots
WHERE slot_content_id IN (1913184, 1932853, 1932892)
ORDER BY slot_revision_id ASC;
*/
/*
slot_revision_id, slot_content_id
2221200, 1913184
2221206, 1913184
2243131, 1932853
2243149, 1932853
.., ..
*/
-- Step 7: Alternative way to obtain oldest examples of reuse
--         because the above way assumes content_id was assigned chronologically,
--         which it might not be, given it was new in MCR, so look at content_address instead.
--         (This query uses testwiki instead of mediawikiwiki, for a smaller database)
/*
SELECT COUNT(*) _count, content_sha1, content_address
FROM content
GROUP BY content_sha1
HAVING _count > 1
ORDER BY content_address ASC
LIMIT 10;
*/
/*
_count, content_sha1, content_address
2, ar73lz4ja7o6ni8b0bh47owfdjyxv7c, es:DB://cluster10/1000?flags=utf-8,gzip
2, 6rmg5p060238btnqdcn11oc7stfuarz, es:DB://cluster10/1009?flags=utf-8,gzip
*/
SELECT slot_revision_id FROM slots WHERE slot_content_id IN (
  SELECT content_id FROM content WHERE content_sha1 IN ('ar73lz4ja7o6ni8b0bh47owfdjyxv7c', '6rmg5p060238btnqdcn11oc7stfuarz')
);
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...