背景
假设有一张学生成绩表
sql">create table student_scores (
id BIGSERIAL PRIMARY KEY,
name varchar,
subject varchar,
exam varchar,
score int
);
DO $DO$
DECLARE
name_var student_scores.name%type;
subject_var student_scores.subject%type;
exam_var student_scores.exam%type;
score_var student_scores.score%type;
BEGIN
FOR name_iter IN 1..10 LOOP
FOR subject_iter IN 1..10 LOOP
FOR exam_iter IN 1..10 LOOP
name_var = 'student_' || name_iter;
subject_var = 'subject_' || subject_iter;
exam_var = 'exam_' || exam_iter;
score_var = ceil(random()*40+60);
insert into student_scores(name, subject, exam, score) values (name_var, subject_var, exam_var, score_var);
END LOOP;
END LOOP;
END LOOP;
END$DO$;
获取学生在所有考试中subject_1和subject_2的平均分
使用FILTER
sql">select name,
AVG(score) FILTER (WHERE subject = 'subject_1') as avg_subject_1,
AVG(score) FILTER (WHERE subject = 'subject_2') as avg_subject_2 from student_scores group by name;
使用多一个group by
sql">select name, subject, avg(score) from student_scores where subject in ('subject_1', 'subject_2') group by name, subject;
FILTER优势
在聚合语句中,默认的数据集合是group by筛选出来的所有符合条件的数据,FILTER可以在此基础上再进一步过滤数据。这在很多时候是非常方便的。再看一个例子:获取学生在所有考试中subject_1和subject_2平均分,只考虑subject1高于65,subject2 高于70分的情况
sql">select name,
AVG(score) FILTER (WHERE subject = 'subject_1' and score > 65) as avg_subject_1,
AVG(score) FILTER (WHERE subject = 'subject_2' and score > 70) as avg_subject_2 from student_scores group by name;
是不是很方便?当然了,不使用filter,也可以通过在group by里面多加一行并且添加where条件的方式达到目的,不过没有FILTER更简洁
使用窗口函数实现topn
现在要求在exam_2这次考试中,各科排名前3的同学
sql">with ranked_students_scores_at_exam2 as (
select *, rank() over (partition by subject order by score desc) as rank, sum(1) over (partition by subject) as participants, avg(score) over (partition by subject) as avg_score from student_scores where exam = 'exam_2'
) select * from ranked_students_scores_at_exam2 where rank <= 3;
development=# with ranked_students_scores_at_exam2 as ( select *, rank() over (partition by subject order by score desc) as rank, sum(1) over (partition by subject) as particip
ants, avg(score) over (partition by subject) as avg_score from student_scores where exam = 'exam_2') select * from ranked_students_scores_at_exam2 where rank <= 3;
id | name | subject | exam | score | rank | participants | avg_score
-----+------------+------------+--------+-------+------+--------------+---------------------
2 | student_1 | subject_1 | exam_2 | 98 | 1 | 10 | 82.1000000000000000
602 | student_7 | subject_1 | exam_2 | 90 | 2 | 10 | 82.1000000000000000
502 | student_6 | subject_1 | exam_2 | 89 | 3 | 10 | 82.1000000000000000
302 | student_4 | subject_1 | exam_2 | 89 | 3 | 10 | 82.1000000000000000
292 | student_3 | subject_10 | exam_2 | 99 | 1 | 10 | 76.1000000000000000
692 | student_7 | subject_10 | exam_2 | 91 | 2 | 10 | 76.1000000000000000
592 | student_6 | subject_10 | exam_2 | 90 | 3 | 10 | 76.1000000000000000
112 | student_2 | subject_2 | exam_2 | 99 | 1 | 10 | 79.8000000000000000
412 | student_5 | subject_2 | exam_2 | 93 | 2 | 10 | 79.8000000000000000
212 | student_3 | subject_2 | exam_2 | 86 | 3 | 10 | 79.8000000000000000
522 | student_6 | subject_3 | exam_2 | 90 | 1 | 10 | 70.6000000000000000
322 | student_4 | subject_3 | exam_2 | 83 | 2 | 10 | 70.6000000000000000
122 | student_2 | subject_3 | exam_2 | 75 | 3 | 10 | 70.6000000000000000
532 | student_6 | subject_4 | exam_2 | 100 | 1 | 10 | 84.5000000000000000
132 | student_2 | subject_4 | exam_2 | 100 | 1 | 10 | 84.5000000000000000
232 | student_3 | subject_4 | exam_2 | 99 | 3 | 10 | 84.5000000000000000
842 | student_9 | subject_5 | exam_2 | 91 | 1 | 10 | 80.1000000000000000
342 | student_4 | subject_5 | exam_2 | 89 | 2 | 10 | 80.1000000000000000
642 | student_7 | subject_5 | exam_2 | 87 | 3 | 10 | 80.1000000000000000
552 | student_6 | subject_6 | exam_2 | 98 | 1 | 10 | 76.8000000000000000
152 | student_2 | subject_6 | exam_2 | 95 | 2 | 10 | 76.8000000000000000
52 | student_1 | subject_6 | exam_2 | 88 | 3 | 10 | 76.8000000000000000
762 | student_8 | subject_7 | exam_2 | 97 | 1 | 10 | 82.9000000000000000
662 | student_7 | subject_7 | exam_2 | 95 | 2 | 10 | 82.9000000000000000
362 | student_4 | subject_7 | exam_2 | 95 | 2 | 10 | 82.9000000000000000
872 | student_9 | subject_8 | exam_2 | 98 | 1 | 10 | 81.1000000000000000
672 | student_7 | subject_8 | exam_2 | 96 | 2 | 10 | 81.1000000000000000
772 | student_8 | subject_8 | exam_2 | 89 | 3 | 10 | 81.1000000000000000
382 | student_4 | subject_9 | exam_2 | 92 | 1 | 10 | 77.6000000000000000
982 | student_10 | subject_9 | exam_2 | 91 | 2 | 10 | 77.6000000000000000
782 | student_8 | subject_9 | exam_2 | 82 | 3 | 10 | 77.6000000000000000
(31 行记录)
这里,仅仅一次查询,就返回了第二次考试中各科目排名前三的同学,以及该科目的平均分数和参与人数。非常的方便。