SQL
AخA
USE euwiki_p;
SELECT *
FROM (
SELECT
tld,
COUNT(DISTINCT page_id) AS article_count,
COUNT(*) AS total_count
FROM (
-- Extract all (page_id, tld) tuples.
SELECT
el_from AS page_id,
LOWER(CONVERT(REGEXP_REPLACE(SUBSTRING(el_index, 0, 60), '(?i)^(?:https?:|ftp:)?//([a-z]+)\\..*?$', '\\1') USING utf8)) AS tld
FROM externallinks
WHERE
-- Only links from main namespace articles.
el_from_namespace = 0
-- Get only fairly well-formed URLs for http, https, ftp or implicit https and exclude IPv4.
AND SUBSTRING(el_index, 0, 60) REGEXP '(?i)^(?:https?:|ftp:)?//[a-z]+\\..*?$'
) AS tlds
GROUP BY tld
) AS results
ORDER BY article_count DESC, total_count DESC;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.