场景2:“网站访客数量统计”
通过下面的示例说明hll如何统计在一段时间内访问网站的不同用户数量:
-- 创建原始数据表,表示某个用户在某个时间访问过网站。
openGauss=# create table facts (
date date,
user_id integer
);
-- 构造数据,表示一天中有哪些用户访问过网站。
openGauss=# insert into facts values ('2019-02-20', generate_series(1,100));
openGauss=# insert into facts values ('2019-02-21', generate_series(1,200));
openGauss=# insert into facts values ('2019-02-22', generate_series(1,300));
openGauss=# insert into facts values ('2019-02-23', generate_series(1,400));
openGauss=# insert into facts values ('2019-02-24', generate_series(1,500));
openGauss=# insert into facts values ('2019-02-25', generate_series(1,600));
openGauss=# insert into facts values ('2019-02-26', generate_series(1,700));
openGauss=# insert into facts values ('2019-02-27', generate_series(1,800));
-- 创建表并指定列为hll。
openGauss=# create table daily_uniques (
date date UNIQUE,
users hll
);
-- 根据日期把数据分组,并把数据插入到hll中。
openGauss=# insert into daily_uniques(date, users)
select date, hll_add_agg(hll_hash_integer(user_id))
from facts
group by 1;
-- 计算每一天访问网站不同用户数量
openGauss=# select date, hll_cardinality(users) from daily_uniques order by date;
date | hll_cardinality
------------+------------------
2019-02-20 | 100
2019-02-21 | 200.217913059312
2019-02-22 | 301.76494508014
2019-02-23 | 400.862858326446
2019-02-24 | 502.626933349694
2019-02-25 | 601.922606454213
2019-02-26 | 696.602316769498
2019-02-27 | 798.111731634412
(8 rows)
-- 计算在2019.02.20到2019.02.26一周中有多少不同用户访问过网站
openGauss=# select hll_cardinality(hll_union_agg(users)) from daily_uniques where date >= '2019-02-20'::date and date <= '2019-02-26'::date;
hll_cardinality
------------------
696.602316769498
(1 row)
-- 计算昨天访问过网站而今天没访问网站的用户数量。
openGauss=# SELECT date, (#hll_union_agg(users) OVER two_days) - #users AS lost_uniques FROM daily_uniques WINDOW two_days AS (ORDER BY date ASC ROWS 1 PRECEDING);
date | lost_uniques
------------+--------------
2019-02-20 | 0
2019-02-21 | 0
2019-02-22 | 0
2019-02-23 | 0
2019-02-24 | 0
2019-02-25 | 0
2019-02-26 | 0
2019-02-27 | 0
(8 rows)
-- 删除表
openGauss=# drop table facts;
openGauss=# drop table daily_uniques;
详情查看:https://opengauss.org
详情查看:https://docs-opengauss.osinfra.cn
特别声明:以上内容(如有图片或视频亦包括在内)为自媒体平台“网易号”用户上传并发布,本平台仅提供信息存储服务。
Notice: The content above (including the pictures and videos if any) is uploaded and posted by a user of NetEase Hao, which is a social media platform and only provides information storage services.