在有三个组合键来确定记录是否唯一的情况下,我想确定哪些记录在表#1中,而不在表#2中.下面的代码是我得到的最接近的代码,但是WHERE子句需要工作.
在此测试用例中,结果包括:
(2, 3, 2, null)
(4, 4, 1, null)
(7, 2, 2, null)
(8, 1, 2, null)
CREATE TABLE OldReports (
REPORT_ID int,
USER_ID int,
CLIENT_ID int,
MY_DATA varchar(100)
)
INSERT INTO OldReports
(REPORT_ID, USER_ID, CLIENT_ID, MY_DATA)
VALUES
(1, 1, 2, null),
(6, 3, 3, null),
(6, 4, 4, null),
(5, 1, 2, null),
(5, 1, 3, null),
(7, 2, 1, null),
(8, 1, 1, null)
CREATE TABLE NewReports (
REPORT_ID int,
USER_ID int,
CLIENT_ID int,
MY_DATA varchar(100)
)
INSERT INTO NewReports
(REPORT_ID, USER_ID, CLIENT_ID, MY_DATA)
VALUES
(1, 1, 2, null),
(2, 3, 2, null),
(4, 4, 1, null),
(5, 1, 2, null),
(5, 1, 3, null),
(7, 2, 2, null),
(8, 1, 2, null)
DROP TABLE IF EXISTS #ReportDifferences
SELECT DISTINCT
REPORT_ID,
USER_ID,
CLIENT_ID
FROM NewReports n
WHERE (IF NOT EXISTS (
SELECT *
FROM OldReports o
WHERE
n.REPORT_ID = o.REPORT_ID and
n.USER_ID = o.USER_ID and
n.CLIENT_ID = o.CLIENT_ID)
)
-- tell me what lives in NewReports but not in OldReports
SELECT * FROM #ReportDifferences
我try 了这个解决方案(How to select all records from one table that do not exist in another table?),但在 Big Data 集上,结果并不是相互排斥的.类似于:
LEFT JOIN OldReports o ON u.CLIENT_ID = o.CLIENT_ID AND u.REPORT_ID = o.REPORT_ID AND u.USER_ID = o.USER_ID
WHERE
u.CLIENT_ID is null
AND u.REPORT_ID is null
AND u.USER_ID is null