This query is marked as a draft This query has been published by Sean.hoyland.

SQL

x
 
/*
Page intersections between a reference actor and a ban evasion source i.e. a sockmaster and their logged and/or categorized socks.
Edit counts for the ban evasion source and their sock accounts are summed.
The query gets the actor_ids for the sockmaster, accounts categorized as suspected or confirmed socks of the sockmaster
and accounts where relevant block log entries include the sockmaster. 
Inconsistencies in the way socks are logged and categorized means that the set of ban evading accounts 
for a given ban evasion source may be incomplete. The ban evasion source AndresHerutJaim is an example 
where this is the case because accounts have been assigned to different parts of the category graph.
Using log comments can, on rare occasions, result in the selection of accounts that were not operated by the sockmaster, 
if for example, the CU carried out during an SPI report for the sockmaster finds other socks not associated with the sockmaster
which are then logged as having been found as part of the sockmaster's SPI. The ban evasion source Tombah is an example where
unrelated socks, IsraPara2, שקולניק, and Wierzba were discovered during a Tombah SPI and the log 
includes Sockpuppet investigations/Tombah. So a log comment based search for Tombah socks would pick up these 3 non-Tombah accounts.
NOTE: It would seem to be more elegant and efficient to place the 3 queries that select the actor_id values
for the sockmaster and their socks in a single CTE with unions, then make a single join to the revision_userindex view, 
but this appears to cause the server to mysteriously choose an inefficient execution plan. Directly joining as a subquery
rather than using a CTE doesn't help either.
Separating the sock queries and joins to the revisions into individual CTEs, then unioning those result sets in a separate CTE,
somewhat counterintuitively appears to help the server choose a better execution plan.
*/
-- Reference account to be compared with the ban evasion set.
set @x = '184.153.21.19'; 
-- set @x = 'Comores 123';
-- set @x = 'Rajoub570';
-- sockmaster - ban evasion account operator label used in SPI/logs/category graph, not always the oldest account.
set @y = 'Epeefleche';
-- set @y = 'Judeobasquelanguage';
-- set @y = 'Owenglyndur';
-- the query - single quotes need to be escaped either by doubling or backslash
set @q = '
-- y1socks = the sockmaster
with y1socks as (
select actor_id from actor_user where actor_name = ?
),
-- y2socks = socks from the category graph, use actor view to include IPs
y2socks as (
select
a.actor_id
from
categorylinks cl
join page p on cl_from = p.page_id
join category cat on cl.cl_to = cat.cat_title
join actor_user a on a.actor_name = replace(p.page_title,''_'','' '')
where
cl.cl_to like ''%Wikipedia_sockpuppets_of_%''
and
cl.cl_to like concat(''%'',?)
),
-- y3socks = socks from the block log comments
y3socks as (
select
a.actor_id
from actor_ipblocks a
join ipblocks b on a.actor_user = b.ipb_user -- to exclude IPs - faster
-- join ipblocks b on a.actor_name = b.ipb_address -- to include IPs - slower
join comment_ipblocks c on b.ipb_reason_id = c.comment_id
where
convert(c.comment_text using utf8mb4) rlike ''(checkuser|sock|multiple accounts|evasion|proxy)''
and convert(c.comment_text using utf8mb4) like concat(''%'',?,''%'')
),
xrev as (
    select ''x'' actor, ru.rev_page, count(*) editcount
    from actor_revision a
    join revision_userindex ru on a.actor_id = ru.rev_actor and a.actor_name = ?
    group by 2
),
y1rev as (
    select ru.rev_page, count(*) editcount, 1 sockcount 
    from y1socks
    join revision_userindex ru on y1socks.actor_id = ru.rev_actor
    group by 1
),
y2rev as (
    select ru.rev_page, count(*) editcount, count(distinct ru.rev_actor) sockcount
    from y2socks
    join revision_userindex ru on y2socks.actor_id = ru.rev_actor
    group by 1
),
y3rev as (
    select ru.rev_page, count(*) editcount, count(distinct ru.rev_actor) sockcount
    from y3socks
    join revision_userindex ru on y3socks.actor_id = ru.rev_actor
    group by 1
),
yrev as (
select y, rev_page, sum(editcount) editcount, sum(sockcount) sockcount
from (
select ''y'' y, rev_page, editcount, sockcount from y1rev
union
select ''y'', rev_page, editcount, sockcount from y2rev
union
select ''y'', rev_page, editcount, sockcount from y3rev
) huh
group by 2
),
inter as (
select
    xrev.rev_page, 
    convert(replace(p.page_title,''_'','' '') using utf8mb4) page_title, 
    p.page_namespace, 
    case
    when p.page_namespace = 0 then ''(Main/Article)''
    when p.page_namespace = 1 then ''Talk''
    when p.page_namespace = 2 then ''User''
    when p.page_namespace = 3 then ''User talk''
    when p.page_namespace = 4 then ''Wikipedia''
    when p.page_namespace = 5 then ''Wikipedia talk''
    when p.page_namespace = 6 then ''File''
    when p.page_namespace = 7 then ''File talk''
    when p.page_namespace = 8 then ''MediaWiki''
    when p.page_namespace = 9 then ''MediaWiki talk''
    when p.page_namespace = 10 then ''Template''
    when p.page_namespace = 11 then ''Template talk''
    when p.page_namespace = 12 then ''Help''
    when p.page_namespace = 13 then ''Help talk''
    when p.page_namespace = 14 then ''Category''
    when p.page_namespace = 15 then ''Category talk''
    when p.page_namespace = 100 then ''Portal''
    when p.page_namespace = 101 then ''Portal talk''
    when p.page_namespace = 118 then ''Draft''
    when p.page_namespace = 119 then ''Draft talk''
    when p.page_namespace = 710 then ''TimedText''
    when p.page_namespace = 711 then ''TimedText talk''
    when p.page_namespace = 828 then ''Module''
    when p.page_namespace = 829 then ''Module talk''
    else null
    end as namespace_desc, 
--  case when p.page_is_redirect = 0 then ''No'' else ''Yes'' end page_is_redirect,
    xrev.editcount ''ref actor editcount'', 
    yrev.editcount ''other actor(s) editcount summed'',
    yrev.sockcount ''page_sock_count''
--  concat(round(100*xrev.editcount/sum(xrev.editcount) over(), 2),''%'') ''ref actor editcount (percent)'', 
--  concat(round(100*yrev.editcount/sum(yrev.editcount) over(), 2),''%'') ''other actor(s) editcount summed (percent)'',
--  round((xrev.editcount - (avg(xrev.editcount) over()))/(stddev(xrev.editcount) over()),5) ''ref actor editcount (standard score)'',
--  round((yrev.editcount - (avg(yrev.editcount) over()))/(stddev(yrev.editcount) over()),5) ''other actor(s) editcount summed (standard score)''
from xrev
join yrev on xrev.rev_page = yrev.rev_page
join page p on xrev.rev_page = p.page_id
-- order by 2,3
-- limit 10
)
select i.*, count(ru.rev_id) page_rev_count, count(distinct ru.rev_actor) page_actor_count
from inter i
join revision_userindex ru on ru.rev_page = i.rev_page
group by 1,2,3,4,5,6
';
-- build the statement, execute, deallocate
prepare s from @q;
execute s using @y, @y, @y, @x;
deallocate prepare s;
By running queries you agree to the Cloud Services Terms of Use and you irrevocably agree to release your SQL under CC0 License.
All SQL code is licensed under CC0 License.

Checking query status...