目前企业应用系统使用的大多数据库都是关系型数据库,关系数据库依赖的理论就是针对集合运算的关系代数。关系代数是一种抽象的查询语言,是关系数据操纵语言的一种传统表达方式。不过我们在工作中发现,很多人在面对复杂的数据库运算逻辑时会采用游标、循环、自定义函数等方式处理,因为游标是一种比较熟悉和舒适的面向过程的编程方式,很符合我们一般的逻辑思维习惯,可很不幸,这会导致糟糕的性能。显然,SQL的总体目的是你要实现什么,而不是怎样实现。大道至简,我们在工作与学习的过程中经常会发现,更好的解决方案往往是简单的,是高效的,是优雅的。
本人曾经用T-SQL重写了一个基于游标的存储过程,那个表只有100,000条记录,原来的存储过程用了40分钟才执行完毕,而新的存储过程只用了不到1秒。在这里,我想将自己遇到和收集到的关于集合运算与游标操作的对比展现给大家,以供参考。
我们有时会遇到这样一个问题,类似于某一列的值累计求和(即本条记录的某个值=前几列该值的合计)。我将解决的核心部分抽取出来。
--- 原始数据如下:
OID | Period | Amount | Balance |
1 | 2009 | 3500.00 | 0.00 |
2 | 2009 | 5100.00 | 0.00 |
3 | 2009 | 10000.00 | 0.00 |
4 | 2010 | 2560.00 | 0.00 |
5 | 2010 | 4700.00 | 0.00 |
-- 预期结果如下(求Balance的值):
OID | Period | Amount | Balance |
1 | 2009 | 3500.00 | 3500.00 |
2 | 2009 | 5100.00 | 8600.00 |
3 | 2009 | 10000.00 | 18600.00 |
4 | 2010 | 2560.00 | 2560.00 |
5 | 2010 | 4700.00 | 7260.00 |
创建测试数据的SQL脚本
CREATE TABLE tPeriod ( OID INT IDENTITY PRIMARY KEY , Period NVARCHAR(20) , Amount DECIMAL(18, 2) DEFAULT 0 , Balance DECIMAL(18, 2) DEFAULT 0 , Balance2 DECIMAL(18, 2) DEFAULT 0 , Balance3 DECIMAL(18, 2) DEFAULT 0 ) GO DECLARE @i INT SET @i = 1900 WHILE @i <= 2013 BEGIN INSERT INTO tPeriod(Period, Amount) SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) SET @i = @i + 1 END INSERT INTO tPeriod(Period, Amount) SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) UNION ALL SELECT CAST(@i AS NVARCHAR), ROUND(RAND() * 10000, -2) GO SELECT * FROM tPeriod; GO
DECLARE @OID INT , @vPeriod_Pre NVARCHAR(20) , @vPeriod_Current NVARCHAR(20) , @dcAmount DECIMAL(18, 2) , @dcBalance DECIMAL(18, 2) DECLARE cursor1 CURSOR FOR SELECT t.OID, t.Period, t.Amount from tPeriod AS t OPEN cursor1 FETCH NEXT FROM cursor1 INTO @OID, @vPeriod_Current, @dcAmount SELECT @vPeriod_Pre = @vPeriod_Current, @dcBalance = 0 WHILE @@FETCH_STATUS = 0 BEGIN IF @vPeriod_Current = @vPeriod_Pre BEGIN SET @dcBalance = @dcBalance + @dcAmount END ELSE BEGIN SELECT @vPeriod_Pre = @vPeriod_Current, @dcBalance = @dcAmount END UPDATE tPeriod SET Balance = @dcBalance WHERE OID = @OID FETCH NEXT FROM cursor1 INTO @OID, @vPeriod_Current, @dcAmount END CLOSE cursor1 DEALLOCATE cursor1
-- 参考答案2 UPDATE tPeriod SET Balance3 = ( SELECT SUM(Amount) FROM tPeriod AS t WHERE t.Period = tPeriod.Period AND t.OID <= tPeriod.OID ) GO -- 参考答案3(SQLSERVER) DECLARE @dcAmt DECIMAL(18, 2), @period CHAR(4) UPDATE T1 SET @dcAmt = CASE WHEN Period = @period THEN @dcAmt + Amount ELSE Amount END, @Period = Period, Balance2 = @dcAmt FROM tPeriod AS T1 GO
-- 参考答案3(Oracle) SELECT t.*, sum(t.amount) over(partition BY t.Period order by t.OID) as acc FROM tPeriod t;