HiveSQL分组排序取topN
库名:db_1
表名:table1
列表:cols4,cols5,cols6,cols2
含义:根据列cols4分组,根据列cols2倒序,取每组第一条数据。
select t2.cols4,t2.cols5,t2.cols6,t2.cols2
from (
select t1.cols4,t1.cols5,t1.cols6,t1.cols2,
row_number() over(partition by t1.cols4 order by t1.cols2 desc) rk
from (
select cols4,cols5,cols6,cols2
from db_1.table1
where cols1='202011' and cols2='20201112' and length(cols3)>1
) t1
) t2 where t2.rk=1 ;
使用(if / when-then-else)
select t1.citycode,t1.data1,t1.data2,t1.data3,t1.allData,
if(t1.allData==0,0,round(t1.data1 * 100 / t1.allData, 2)) as data1Rate
from (
select citycode,
count(distinct case when col1='data1' then col2 else null end) as data1,
count(distinct case when col1='data1' and col3='data2' then col2 else null end) as data2,
count(distinct case when col1='data1' or col3='data3' then col2 else null end) as data3,
count(distinct col2) as allData
from db1.table1 where col4='data4'
group by citycode
) t1
一列变成多列多行:lateral view explode + split
select t1.col1,t1.col2,t1.col3,t1.col4,t1.col5,
t1.subinfo[4] as c1,
t1.subinfo[1] as c2,
t1.subinfo[0] as c3,
t1.subinfo[2] as c4,
t1.subinfo[3] as c5,
t1.subinfo[5] as c6,
t1.subinfo[6] as c7,
t1.subinfo[7] as c8,
t1.subinfo[8] as c9,
t1.subinfo[9] as c10,
t1.subinfo[10] as c11,
t1.subinfo[11] as c12,
t1.subinfo[12] as c13,t1.col6
from(
select t.col1,t.col2,t.col3,t.col4,t.col5,t.col6,split(t.sublist,'\\\\|') as subinfo
from (
select t2.col1,t2.col2,t2.col3,t2.col4,t2.col5,t2.col6,sublist
from (
select col1,col2,col3,col4,col5,col6,datalist
from db1.table1 where col1='data1' and col2='data2'
) t2 lateral view explode(split(t2.datalist,',')) test_sub as sublist
) t
) t1;