SQL
AخA
SELECT CONCAT('https://en.wikipedia.org/wiki/User:', p.page_title) AS URL
FROM enwiki_p.page p
LEFT JOIN enwiki_p.user u ON REPLACE(p.page_title, '_', ' ') = u.user_name
WHERE p.page_namespace = 2
AND u.user_id IS NULL
AND p.page_is_redirect = 0
AND NOT IS_IPV4(p.page_title) -- Exclude IPv4
AND NOT IS_IPV6(p.page_title) -- Exclude IPv6
AND p.page_title NOT LIKE '%.css'
AND p.page_title NOT LIKE '%.cs'
AND p.page_title NOT LIKE '%.js'
AND p.page_title NOT LIKE '%.charset'
AND p.page_title NOT LIKE '%.%.%.xxx'
AND p.page_title NOT IN ('Unknown_user', 'Global_rename_script', 'Sec\"\'27\\u00226\\x3ETest_(WMF)') -- Exclude technical user pages
AND p.page_id NOT IN (
SELECT cl.cl_from FROM
enwiki_p.categorylinks cl
WHERE cl.cl_to IN ('User_soft_redirects', 'Wikipedia_doppelganger_accounts', 'Wikipedia_alternative_accounts', 'Retired_Wikipedians', 'All_redirects_for_discussion') -- Exclude these categories
)
AND p.page_id NOT IN (
SELECT tl.tl_from FROM
enwiki_p.templatelinks tl
WHERE tl.tl_target_id IN (32197, 277052, 282063, 115252, 1968835) -- Exclude pages tagged with sockpuppet template
)
AND p.page_title NOT LIKE '%/%'
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.