我判断了您传递的查询的两个示例,然后找到了公共值project_id
,并修改了第二个示例,以获得提交的project_id
和created_date
.然后,正如你所提到的,我决定将created_date
格式化为年份和月份,并将其添加为过滤器.
然后,我将这两个示例合并到一个CTE
中,只需SELECT
个所需列的名称.
最后,我只使用了ROW_NUMBER
来表示按国家/年/月列出的每种语言处理字节的最大值.
WITH ltb as(
select pl3.lang, sum(pl3.size) as total_bytes, pl3.project_id
from (
select pl2.bytes as size, pl2.language as lang, pl2.project_id
from (
select pl.language as lang, max(pl.created_at) as latest, pl.project_id as project_id
from `ghtorrent-bq.ght.project_languages` pl
join `ghtorrent-bq.ght.projects` p on p.id = pl.project_id
where p.deleted is false
and p.forked_from is null
group by lang, project_id
) pl1 join `ghtorrent-bq.ght.project_languages` pl2 on pl1.project_id = pl2.project_id
and pl1.latest = pl2.created_at
and pl1.lang = pl2.language
) pl3
group by pl3.lang, pl3.project_id
order by total_bytes desc
), fprt as(
SELECT country_code, count(*) AS NoOfCommits, c.project_id,
FORMAT_TIMESTAMP("%m", c.created_at)
AS formattedmonth,FORMAT_TIMESTAMP("%b", c.created_at)
AS formattedmonthname, FORMAT_TIMESTAMP("%Y", c.created_at)
AS formattedyear,
FROM `ghtorrent-bq.ght.commits` AS c
JOIN `ghtorrent-bq.ght.users` AS u
ON c.Committer_Id = u.id
WHERE NOT u.fake and country_code is not null
GROUP BY country_code, c.project_id, formattedmonth, formattedyear, formattedmonthname
ORDER BY NoOfCommits DESC
), almst as(
SELECT country_code,formattedmonth, formattedmonthname, formattedyear, lang, NoOfCommits, total_bytes FROM fprt JOIN ltb
on ltb.project_id=fprt.project_id
where country_code is not null
)
SELECT country_code, formattedyear as year, formattedmonthname as month, lang, NoOfCommits, total_bytes
FROM
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY country_code, formattedyear, formattedmonth ORDER BY total_bytes DESC) rn
FROM almst
) t
WHERE rn = 1
ORDER BY formattedyear asc, formattedmonth asc
输出: