我有一个这样的SQL表:

| SomeID         | OtherID     | Data
+----------------+-------------+-------------------
| abcdef-.....   | cdef123-... | 18,20,22
| abcdef-.....   | 4554a24-... | 17,19
| 987654-.....   | 12324a2-... | 13,19,20

是否有一个查询,我可以执行查询,如SELECT OtherID, SplitData where SomeID = 'abcdef-.......,返回单独的行,就像这样:

| OtherID     | SplitData
+-------------+-------------------
| cdef123-... | 18
| cdef123-... | 20
| cdef123-... | 22
| 4554a24-... | 17
| 4554a24-... | 19

基本上把我的数据在逗号处分割成单独的行?

我知道将逗号分隔的字符串存储到关系数据库中听起来很愚蠢,但消费者应用程序中的正常用例使这种方法非常有用。

我不想在应用程序中进行拆分,因为我需要分页,所以我想在重构整个应用程序之前探索选项。

它是SQL Server 2008(非r2)。


当前回答

通过创建这个分割字符串的函数([DelimitedSplit]),可以对SELECT执行OUTER APPLY。

CREATE FUNCTION [dbo].[DelimitedSplit]
--===== Define I/O parameters
        (@pString VARCHAR(8000), @pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE!  IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
 RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
     -- enough to cover VARCHAR(8000)
  WITH E1(N) AS (
                 SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
                 SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
                 SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
                ),                          --10E+1 or 10 rows
       E2(N) AS (SELECT 1 FROM E1 a INNER JOIN E1 b ON b.N = a.N), --10E+2 or 100 rows
       E4(N) AS (SELECT 1 FROM E2 a INNER JOIN E2 b ON b.N = a.N), --10E+4 or 10,000 rows max
 cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
                     -- for both a performance gain and prevention of accidental "overruns"
                 SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
                ),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
                 SELECT 1 UNION ALL
                 SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter
                ),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
                 SELECT s.N1,
                        ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000)
                   FROM cteStart s
                )
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
 SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
        Item       = SUBSTRING(@pString, l.N1, l.L1)
   FROM cteLen l
;

TEST

CREATE TABLE #Testdata
(
    SomeID INT,
    OtherID INT,
    String VARCHAR(MAX)
);

INSERT #Testdata SELECT 1,  9, '18,20,22';
INSERT #Testdata SELECT 2,  8, '17,19';
INSERT #Testdata SELECT 3,  7, '13,19,20';
INSERT #Testdata SELECT 4,  6, '';
INSERT #Testdata SELECT 9, 11, '1,2,3,4';

SELECT
 *
FROM #Testdata
OUTER APPLY [dbo].[DelimitedSplit](String,',');

DROP TABLE #Testdata;

结果

SomeID  OtherID String      ItemNumber  Item
1       9       18,20,22    1           18
1       9       18,20,22    2           20
1       9       18,20,22    3           22
2       8       17,19       1           17
2       8       17,19       2           19
3       7       13,19,20    1           13
3       7       13,19,20    2           19
3       7       13,19,20    3           20
4       6       1   
9       11      1,2,3,4     1           1
9       11      1,2,3,4     2           2
9       11      1,2,3,4     3           3
9       11      1,2,3,4     4           4

其他回答

终于,SQL Server 2016结束了等待。他们引入了Split字符串函数STRING_SPLIT:

select OtherID, cs.Value --SplitData
from yourtable
cross apply STRING_SPLIT (Data, ',') cs

所有其他方法来分割字符串,如XML, tallytable, while循环等。已经被STRING_SPLIT函数破坏了。

这里有一篇关于性能比较的优秀文章:性能惊喜和假设:STRING_SPLIT。

对于旧版本,这里使用的是一个分割字符串函数(可能的最佳方法)

CREATE FUNCTION [dbo].[DelimitedSplit8K] (@pString VARCHAR(8000), @pDelimiter CHAR(1)) RETURNS TABLE WITH SCHEMABINDING AS RETURN --===== "Inline" CTE Driven "Tally Table" produces values from 0 up to 10,000... -- enough to cover NVARCHAR(4000) WITH E1(N) AS ( SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 ), --10E+1 or 10 rows E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front -- for both a performance gain and prevention of accidental "overruns" SELECT TOP (ISNULL(DATALENGTH(@pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4 ), cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter) SELECT 1 UNION ALL SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(@pString,t.N,1) = @pDelimiter ), cteLen(N1,L1) AS(--==== Return start and length (for use in substring) SELECT s.N1, ISNULL(NULLIF(CHARINDEX(@pDelimiter,@pString,s.N1),0)-s.N1,8000) FROM cteStart s ) --===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found. SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1), Item = SUBSTRING(@pString, l.N1, l.L1) FROM cteLen l ;

从talley引用哦!一个改进的SQL 8K“CSV分配器”函数

截至2016年2月-见talltytable示例-很可能从2014年2月开始超过我的TVF。为子孙后代保留以下原始帖子:


上面的例子中重复的代码太多了。我不喜欢cte和XML的性能。还有一个显式Id,以便特定于订单的消费者可以指定order BY子句。

CREATE FUNCTION dbo.Split
(
    @Line nvarchar(MAX),
    @SplitOn nvarchar(5) = ','
)
RETURNS @RtnValue table
(
    Id INT NOT NULL IDENTITY(1,1) PRIMARY KEY CLUSTERED,
    Data nvarchar(100) NOT NULL
)
AS
BEGIN
    IF @Line IS NULL RETURN;

    DECLARE @split_on_len INT = LEN(@SplitOn);
    DECLARE @start_at INT = 1;
    DECLARE @end_at INT;
    DECLARE @data_len INT;

    WHILE 1=1
    BEGIN
        SET @end_at = CHARINDEX(@SplitOn,@Line,@start_at);
        SET @data_len = CASE @end_at WHEN 0 THEN LEN(@Line) ELSE @end_at-@start_at END;
        INSERT INTO @RtnValue (data) VALUES( SUBSTRING(@Line,@start_at,@data_len) );
        IF @end_at = 0 BREAK;
        SET @start_at = @end_at + @split_on_len;
    END;

    RETURN;
END;

当使用这种方法时,您必须确保您的值中没有包含非法XML - user1151923

我总是使用XML方法。确保使用VALID XML。我有两个函数在有效的XML和文本之间转换。(我倾向于去掉回车,因为我通常不需要它们。

CREATE FUNCTION dbo.udf_ConvertTextToXML (@Text varchar(MAX)) 
    RETURNS varchar(MAX)
AS
    BEGIN
        SET @Text = REPLACE(@Text,CHAR(10),'');
        SET @Text = REPLACE(@Text,CHAR(13),'');
        SET @Text = REPLACE(@Text,'<','&lt;');
        SET @Text = REPLACE(@Text,'&','&amp;');
        SET @Text = REPLACE(@Text,'>','&gt;');
        SET @Text = REPLACE(@Text,'''','&apos;');
        SET @Text = REPLACE(@Text,'"','&quot;');
    RETURN @Text;
END;


CREATE FUNCTION dbo.udf_ConvertTextFromXML (@Text VARCHAR(MAX)) 
    RETURNS VARCHAR(max)
AS
    BEGIN
        SET @Text = REPLACE(@Text,'&lt;','<');
        SET @Text = REPLACE(@Text,'&amp;','&');
        SET @Text = REPLACE(@Text,'&gt;','>');
        SET @Text = REPLACE(@Text,'&apos;','''');
        SET @Text = REPLACE(@Text,'&quot;','"');
    RETURN @Text;
END;
DECLARE @id_list VARCHAR(MAX) = '1234,23,56,576,1231,567,122,87876,57553,1216';
DECLARE @table TABLE ( id VARCHAR(50) );
DECLARE @x INT = 0;
DECLARE @firstcomma INT = 0;
DECLARE @nextcomma INT = 0;

SET @x = LEN(@id_list) - LEN(REPLACE(@id_list, ',', '')) + 1; -- number of ids in id_list

WHILE @x > 0
    BEGIN
        SET @nextcomma = CASE WHEN CHARINDEX(',', @id_list, @firstcomma + 1) = 0
                              THEN LEN(@id_list) + 1
                              ELSE CHARINDEX(',', @id_list, @firstcomma + 1)
                         END;
        INSERT  INTO @table
        VALUES  ( SUBSTRING(@id_list, @firstcomma + 1, (@nextcomma - @firstcomma) - 1) );
        SET @firstcomma = CHARINDEX(',', @id_list, @firstcomma + 1);
        SET @x = @x - 1;
    END;

SELECT  *
FROM    @table;

函数

CREATE FUNCTION dbo.SplitToRows (@column varchar(100), @separator varchar(10))
RETURNS @rtnTable TABLE
  (
  ID int identity(1,1),
  ColumnA varchar(max)
  )
 AS
BEGIN
    DECLARE @position int = 0;
    DECLARE @endAt int = 0;
    DECLARE @tempString varchar(100);
    
    set @column = ltrim(rtrim(@column));

    WHILE @position<=len(@column)
    BEGIN       
        set @endAt = CHARINDEX(@separator,@column,@position);
            if(@endAt=0)
            begin
            Insert into @rtnTable(ColumnA) Select substring(@column,@position,len(@column)-@position);
            break;
            end;
        set @tempString = substring(ltrim(rtrim(@column)),@position,@endAt-@position);

        Insert into @rtnTable(ColumnA) select @tempString;
        set @position=@endAt+1;
    END;
    return;
END;

用例

select * from dbo.SplitToRows('T14; p226.0001; eee; 3554;', ';');

或者只是一个有多个结果集的选择

DECLARE @column varchar(max)= '1234; 4748;abcde; 324432';
DECLARE @separator varchar(10) = ';';
DECLARE @position int = 0;
DECLARE @endAt int = 0;
DECLARE @tempString varchar(100);

set @column = ltrim(rtrim(@column));

WHILE @position<=len(@column)
BEGIN       
    set @endAt = CHARINDEX(@separator,@column,@position);
        if(@endAt=0)
        begin
        Select substring(@column,@position,len(@column)-@position);
        break;
        end;
    set @tempString = substring(ltrim(rtrim(@column)),@position,@endAt-@position);

    select @tempString;
    set @position=@endAt+1;
END;