SQL
x
USE skwiki_p;
-- Log2 bins
SELECT
FLOOR(LOG2(p.page_len)) as size_bucket,
POW(2, FLOOR(LOG2(p.page_len))) as size_from,
POW(2, FLOOR(LOG2(p.page_len)) + 1) - 1 as size_to,
COUNT(*) as frequency
FROM
page p
WHERE
p.page_namespace = 0 and
p.page_is_redirect = 0
GROUP BY
size_bucket
ORDER BY
size_bucket
;
-- Log10 bins
SELECT
FLOOR(LOG10(p.page_len)) as size_bucket,
POW(10, FLOOR(LOG10(p.page_len))) as size_from,
POW(10, FLOOR(LOG10(p.page_len)) + 1) - 1 as size_to,
COUNT(*) as frequency
FROM
page p
WHERE
p.page_namespace = 0 and
p.page_is_redirect = 0
GROUP BY
size_bucket
ORDER BY
size_bucket
;
-- Linear 1000 B bins
SELECT
FLOOR(p.page_len / 1000) as size_bucket,
FLOOR(p.page_len / 1000) * 1000 as size_from,
(FLOOR(p.page_len / 1000) + 1) * 1000 - 1 as size_to,
COUNT(*) as frequency
FROM
page p
WHERE
p.page_namespace = 0 and
p.page_is_redirect = 0
GROUP BY
size_bucket
ORDER BY
size_bucket
;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.