SQL
x
-- Step 1: Get page ID
-- https://www.mediawiki.org/w/index.php?title=Project:Sandbox&action=history
-- page_id: 1428
-- Step 2: Get most common rev_sha1 of each year (i.e. the one we revert/rollback/undo toward)
/*
SELECT
COUNT(*) _count,
SUBSTR(rev_timestamp, 1, 4) _year,
rev_sha1
FROM revision
WHERE rev_page=1428
GROUP BY _year, rev_sha1
ORDER BY _count DESC
LIMIT 100;
*/
/*
_year, rev_sha1
2011-2025, ni8p3ypkq39hxrmit1g3300saroa1mk (common action: reset to sandbox template)
........., phoiac9h4m842xq45sp7s6u21eteeq1 (common revision content: blank)
*/
-- Step 3: How common is this content, within this page, overall?
/*
SELECT COUNT(*) _count FROM revision
WHERE rev_page=1428
AND rev_sha1='ni8p3ypkq39hxrmit1g3300saroa1mk';
*/
/*
_count
15,583 revision IDs
*/
-- Step 4: How common is content re-use?
/*
SELECT COUNT(*) _count, slot_content_id FROM slots
WHERE slot_revision_id IN (
SELECT rev_id
FROM revision
WHERE rev_page=1428
AND rev_sha1='ni8p3ypkq39hxrmit1g3300saroa1mk'
ORDER BY rev_timestamp DESC
)
GROUP BY slot_content_id
HAVING _count > 1
ORDER BY _count DESC;
*/
/*
70 rows
_count DESC, slot_content_id
5, 6577768
5, 6572543
4, 5925083
...
_count, slot_content_id ASC
2, 1913184
2, 1932853
2, 1932892
...
*/
-- Step 5: Obtain sample revisions of the most common ones
/*
SELECT slot_revision_id, slot_content_id
FROM slots
WHERE slot_content_id IN (6577768, 6572543, 5925083)
ORDER BY slot_revision_id ASC;
*/
-- Step 6: Obtain sample revisions of the oldest ones
/*
SELECT slot_revision_id, slot_content_id
FROM slots
WHERE slot_content_id IN (1913184, 1932853, 1932892)
ORDER BY slot_revision_id ASC;
*/
/*
slot_revision_id, slot_content_id
2221200, 1913184
2221206, 1913184
2243131, 1932853
2243149, 1932853
.., ..
*/
-- Step 7: Alternative way to obtain oldest examples of reuse
-- because the above way assumes content_id was assigned chronologically,
-- which it might not be, given it was new in MCR, so look at content_address instead.
-- (This query uses testwiki instead of mediawikiwiki, for a smaller database)
/*
SELECT COUNT(*) _count, content_sha1, content_address
FROM content
GROUP BY content_sha1
HAVING _count > 1
ORDER BY content_address ASC
LIMIT 10;
*/
/*
_count, content_sha1, content_address
2, ar73lz4ja7o6ni8b0bh47owfdjyxv7c, es:DB://cluster10/1000?flags=utf-8,gzip
2, 6rmg5p060238btnqdcn11oc7stfuarz, es:DB://cluster10/1009?flags=utf-8,gzip
*/
SELECT slot_revision_id FROM slots WHERE slot_content_id IN (
SELECT content_id FROM content WHERE content_sha1 IN ('ar73lz4ja7o6ni8b0bh47owfdjyxv7c', '6rmg5p060238btnqdcn11oc7stfuarz')
);
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.