如何从字符串中删除所有非字母的字符?
非字母数字呢?
这必须是一个自定义函数还是也有更通用的解决方案?
如何从字符串中删除所有非字母的字符?
非字母数字呢?
这必须是一个自定义函数还是也有更通用的解决方案?
当前回答
Here's a solution that doesn't require creating a function or listing all instances of characters to replace. It uses a recursive WITH statement in combination with a PATINDEX to find unwanted chars. It will replace all unwanted chars in a column - up to 100 unique bad characters contained in any given string. (E.G. "ABC123DEF234" would contain 4 bad characters 1, 2, 3 and 4) The 100 limit is the maximum number of recursions allowed in a WITH statement, but this doesn't impose a limit on the number of rows to process, which is only limited by the memory available. If you don't want DISTINCT results, you can remove the two options from the code.
-- Create some test data:
SELECT * INTO #testData
FROM (VALUES ('ABC DEF,K.l(p)'),('123H,J,234'),('ABCD EFG')) as t(TXT)
-- Actual query:
-- Remove non-alpha chars: '%[^A-Z]%'
-- Remove non-alphanumeric chars: '%[^A-Z0-9]%'
DECLARE @BadCharacterPattern VARCHAR(250) = '%[^A-Z]%';
WITH recurMain as (
SELECT DISTINCT CAST(TXT AS VARCHAR(250)) AS TXT, PATINDEX(@BadCharacterPattern, TXT) AS BadCharIndex
FROM #testData
UNION ALL
SELECT CAST(TXT AS VARCHAR(250)) AS TXT, PATINDEX(@BadCharacterPattern, TXT) AS BadCharIndex
FROM (
SELECT
CASE WHEN BadCharIndex > 0
THEN REPLACE(TXT, SUBSTRING(TXT, BadCharIndex, 1), '')
ELSE TXT
END AS TXT
FROM recurMain
WHERE BadCharIndex > 0
) badCharFinder
)
SELECT DISTINCT TXT
FROM recurMain
WHERE BadCharIndex = 0;
其他回答
使用CTE生成的数字表来检查每个字符,然后FOR XML连接到一个保留值的字符串,您可以…
CREATE FUNCTION [dbo].[PatRemove](
@pattern varchar(50),
@expression varchar(8000)
)
RETURNS varchar(8000)
AS
BEGIN
WITH
d(d) AS (SELECT d FROM (VALUES (0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) digits(d)),
nums(n) AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM d d1, d d2, d d3, d d4),
chars(c) AS (SELECT SUBSTRING(@expression, n, 1) FROM nums WHERE n <= LEN(@expression))
SELECT
@expression = (SELECT c AS [text()] FROM chars WHERE c NOT LIKE @pattern FOR XML PATH(''));
RETURN @expression;
END
Here's a solution that doesn't require creating a function or listing all instances of characters to replace. It uses a recursive WITH statement in combination with a PATINDEX to find unwanted chars. It will replace all unwanted chars in a column - up to 100 unique bad characters contained in any given string. (E.G. "ABC123DEF234" would contain 4 bad characters 1, 2, 3 and 4) The 100 limit is the maximum number of recursions allowed in a WITH statement, but this doesn't impose a limit on the number of rows to process, which is only limited by the memory available. If you don't want DISTINCT results, you can remove the two options from the code.
-- Create some test data:
SELECT * INTO #testData
FROM (VALUES ('ABC DEF,K.l(p)'),('123H,J,234'),('ABCD EFG')) as t(TXT)
-- Actual query:
-- Remove non-alpha chars: '%[^A-Z]%'
-- Remove non-alphanumeric chars: '%[^A-Z0-9]%'
DECLARE @BadCharacterPattern VARCHAR(250) = '%[^A-Z]%';
WITH recurMain as (
SELECT DISTINCT CAST(TXT AS VARCHAR(250)) AS TXT, PATINDEX(@BadCharacterPattern, TXT) AS BadCharIndex
FROM #testData
UNION ALL
SELECT CAST(TXT AS VARCHAR(250)) AS TXT, PATINDEX(@BadCharacterPattern, TXT) AS BadCharIndex
FROM (
SELECT
CASE WHEN BadCharIndex > 0
THEN REPLACE(TXT, SUBSTRING(TXT, BadCharIndex, 1), '')
ELSE TXT
END AS TXT
FROM recurMain
WHERE BadCharIndex > 0
) badCharFinder
)
SELECT DISTINCT TXT
FROM recurMain
WHERE BadCharIndex = 0;
从性能角度来看,我会使用内联函数:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[udf_RemoveNumericCharsFromString]
(
@List NVARCHAR(4000)
)
RETURNS TABLE
AS RETURN
WITH GetNums AS (
SELECT TOP(ISNULL(DATALENGTH(@List), 0))
n = ROW_NUMBER() OVER(ORDER BY (SELECT NULL))
FROM
(VALUES (0),(0),(0),(0)) d (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) e (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) f (n),
(VALUES (0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) g (n)
)
SELECT StrOut = ''+
(SELECT Chr
FROM GetNums
CROSS APPLY (SELECT SUBSTRING(@List , n,1)) X(Chr)
WHERE Chr LIKE '%[^0-9]%'
ORDER BY N
FOR XML PATH (''),TYPE).value('.','NVARCHAR(MAX)')
/*How to Use
SELECT StrOut FROM dbo.udf_RemoveNumericCharsFromString ('vv45--9gut')
Result: vv--gut
*/
CREATE FUNCTION remove_spc_char(@str VARCHAR(MAX))
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE @resp VARCHAR(MAX) = '';
DECLARE @str_val VARCHAR(MAX) = UPPER(@str);
DECLARE @i INTEGER= 1;
DECLARE @v_asc INTEGER;
WHILE @i <= (LEN(@str_val))
BEGIN
SET @v_asc = (ASCII(SUBSTRING(@str_val, @i, 1)))
BEGIN
IF @v_asc in (192,193,194,195,196,65)
begin
SET @v_asc = 65;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (200,201,202,203,233,69)
begin
SET @v_asc = 69;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (204,205,206,207,296,73)
begin
SET @v_asc = 73;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (210,211,212,213,214,79)
begin
SET @v_asc = 79;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (217,218,219,220,85)
begin
SET @v_asc = 85;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (199,231,67)
begin
SET @v_asc = 67;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (209,78)
begin
SET @v_asc = 78;
SET @resp = concat(@resp, CHAR(@v_asc));
end;
IF @v_asc in (924,181,358,216,222,330,272,208,198,42,37,38,34,36,35,
64,33,39,41,40,43,61,95,45,62,60,63,47,176,183,124,166,174,359,248,254,
180,170,186,126,312,331,273,172,178,179,163,162,123,91,93,125,92,167,240,
223,230,171,187,169,185,168)
begin
SET @resp = concat(@resp, '');
end;
ELSE
begin
if @v_asc not in (65,67,69,73,78,79,85)
begin
SET @resp = concat(@resp, CHAR(@v_asc));
end;
end;
END;
SET @i = @i + 1
END;
RETURN @resp;
END;
SQL Server >= 2017…
declare @text varchar(max)
-- create some sample text
select
@text=
'
Lorem @ipsum *&dolor-= sit?! amet, {consectetur } adipiscing\ elit. Vivamus commodo justo metus, sed facilisis ante
congue eget. Proin ac bibendum sem/.
'
-- the characters to be removed
declare @unwanted varchar(max)='''.,!?/<>"[]{}|`~@#$%^&*()-+=/\:;'+char(13)+char(10)
-- interim replaced with
declare @replace_with char(1)=' '
-- call the translate function that will change unwanted characters to spaces
-- in this sample
declare @translated varchar(max)
select @translated=TRANSLATE(@text,@unwanted,REPLICATE(@replace_with,len(@unwanted)))
-- In this case, I want to preserve one space
select string_agg(trim(value),' ')
from STRING_SPLIT(@translated,' ')
where trim(value)<>''
-- Result
'Lorem ipsum dolor sit amet consectetur adipiscing elit Vivamus commodo justo metus sed facilisis ante congue eget Proin ac bibendum sem'