这是我的一点小小的贡献。在同一字符串中替换而不创建新字符串或stringbuilder的方法。它快速,易于理解,是本文中所有提到的一个很好的选择。
private static HashSet<char> _invalidCharsHash;
private static HashSet<char> InvalidCharsHash
{
get { return _invalidCharsHash ?? (_invalidCharsHash = new HashSet<char>(Path.GetInvalidFileNameChars())); }
}
private static string ReplaceInvalidChars(string fileName, string newValue)
{
char newChar = newValue[0];
char[] chars = fileName.ToCharArray();
for (int i = 0; i < chars.Length; i++)
{
char c = chars[i];
if (InvalidCharsHash.Contains(c))
chars[i] = newChar;
}
return new string(chars);
}
你可以这样调用它:
string illegal = "\"M<>\"\\a/ry/ h**ad:>> a\\/:*?\"<>| li*tt|le|| la\"mb.?";
string legal = ReplaceInvalidChars(illegal);
并返回:
_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
值得注意的是,该方法总是将无效字符替换为给定值,但不会删除它们。如果你想删除无效字符,这个替代方法可以做到:
private static string RemoveInvalidChars(string fileName, string newValue)
{
char newChar = string.IsNullOrEmpty(newValue) ? char.MinValue : newValue[0];
bool remove = newChar == char.MinValue;
char[] chars = fileName.ToCharArray();
char[] newChars = new char[chars.Length];
int i2 = 0;
for (int i = 0; i < chars.Length; i++)
{
char c = chars[i];
if (InvalidCharsHash.Contains(c))
{
if (!remove)
newChars[i2++] = newChar;
}
else
newChars[i2++] = c;
}
return new string(newChars, 0, i2);
}
基准
如果性能是你所追求的,我用这篇文章中发现的大多数方法执行了定时测试运行。其中一些方法不会替换为给定的char,因为OP要求清除字符串。我添加了用给定字符替换的测试,如果您的预期场景只需要删除不需要的字符,还添加了一些用空字符替换的测试。用于此基准测试的代码位于末尾,因此您可以运行自己的测试。
注意:方法Test1和Test2都在本文中提出。
第一次运行
替换为'_',1000000次迭代
结果:
============Test1===============
Elapsed=00:00:01.6665595
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test2===============
Elapsed=00:00:01.7526835
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test3===============
Elapsed=00:00:05.2306227
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test4===============
Elapsed=00:00:14.8203696
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test5===============
Elapsed=00:00:01.8273760
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test6===============
Elapsed=00:00:05.4249985
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test7===============
Elapsed=00:00:07.5653833
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test8===============
Elapsed=00:12:23.1410106
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test9===============
Elapsed=00:00:02.1016708
Result=_M ____a_ry_ h__ad___ a_________ li_tt_le__ la_mb._
============Test10===============
Elapsed=00:00:05.0987225
Result=M ary had a little lamb.
============Test11===============
Elapsed=00:00:06.8004289
Result=M ary had a little lamb.
第二次运行
去除无效字符,100万次迭代
注意:Test1不会移除,只会替换。
结果:
============Test1===============
Elapsed=00:00:01.6945352
Result= M a ry h ad a li tt le la mb.
============Test2===============
Elapsed=00:00:01.4798049
Result=M ary had a little lamb.
============Test3===============
Elapsed=00:00:04.0415688
Result=M ary had a little lamb.
============Test4===============
Elapsed=00:00:14.3397960
Result=M ary had a little lamb.
============Test5===============
Elapsed=00:00:01.6782505
Result=M ary had a little lamb.
============Test6===============
Elapsed=00:00:04.9251707
Result=M ary had a little lamb.
============Test7===============
Elapsed=00:00:07.9562379
Result=M ary had a little lamb.
============Test8===============
Elapsed=00:12:16.2918943
Result=M ary had a little lamb.
============Test9===============
Elapsed=00:00:02.0770277
Result=M ary had a little lamb.
============Test10===============
Elapsed=00:00:05.2721232
Result=M ary had a little lamb.
============Test11===============
Elapsed=00:00:05.2802903
Result=M ary had a little lamb.
基准测试结果
方法Test1、Test2和Test5是最快的。方法Test8是最慢的。
CODE
以下是基准测试的完整代码:
private static HashSet<char> _invalidCharsHash;
private static HashSet<char> InvalidCharsHash
{
get { return _invalidCharsHash ?? (_invalidCharsHash = new HashSet<char>(Path.GetInvalidFileNameChars())); }
}
private static string _invalidCharsValue;
private static string InvalidCharsValue
{
get { return _invalidCharsValue ?? (_invalidCharsValue = new string(Path.GetInvalidFileNameChars())); }
}
private static char[] _invalidChars;
private static char[] InvalidChars
{
get { return _invalidChars ?? (_invalidChars = Path.GetInvalidFileNameChars()); }
}
static void Main(string[] args)
{
string testPath = "\"M <>\"\\a/ry/ h**ad:>> a\\/:*?\"<>| li*tt|le|| la\"mb.?";
int max = 1000000;
string newValue = "";
TimeBenchmark(max, Test1, testPath, newValue);
TimeBenchmark(max, Test2, testPath, newValue);
TimeBenchmark(max, Test3, testPath, newValue);
TimeBenchmark(max, Test4, testPath, newValue);
TimeBenchmark(max, Test5, testPath, newValue);
TimeBenchmark(max, Test6, testPath, newValue);
TimeBenchmark(max, Test7, testPath, newValue);
TimeBenchmark(max, Test8, testPath, newValue);
TimeBenchmark(max, Test9, testPath, newValue);
TimeBenchmark(max, Test10, testPath, newValue);
TimeBenchmark(max, Test11, testPath, newValue);
Console.Read();
}
private static void TimeBenchmark(int maxLoop, Func<string, string, string> func, string testString, string newValue)
{
var sw = new Stopwatch();
sw.Start();
string result = string.Empty;
for (int i = 0; i < maxLoop; i++)
result = func?.Invoke(testString, newValue);
sw.Stop();
Console.WriteLine($"============{func.Method.Name}===============");
Console.WriteLine("Elapsed={0}", sw.Elapsed);
Console.WriteLine("Result={0}", result);
Console.WriteLine("");
}
private static string Test1(string fileName, string newValue)
{
char newChar = string.IsNullOrEmpty(newValue) ? char.MinValue : newValue[0];
char[] chars = fileName.ToCharArray();
for (int i = 0; i < chars.Length; i++)
{
if (InvalidCharsHash.Contains(chars[i]))
chars[i] = newChar;
}
return new string(chars);
}
private static string Test2(string fileName, string newValue)
{
char newChar = string.IsNullOrEmpty(newValue) ? char.MinValue : newValue[0];
bool remove = newChar == char.MinValue;
char[] chars = fileName.ToCharArray();
char[] newChars = new char[chars.Length];
int i2 = 0;
for (int i = 0; i < chars.Length; i++)
{
char c = chars[i];
if (InvalidCharsHash.Contains(c))
{
if (!remove)
newChars[i2++] = newChar;
}
else
newChars[i2++] = c;
}
return new string(newChars, 0, i2);
}
private static string Test3(string filename, string newValue)
{
foreach (char c in InvalidCharsValue)
{
filename = filename.Replace(c.ToString(), newValue);
}
return filename;
}
private static string Test4(string filename, string newValue)
{
Regex r = new Regex(string.Format("[{0}]", Regex.Escape(InvalidCharsValue)));
filename = r.Replace(filename, newValue);
return filename;
}
private static string Test5(string filename, string newValue)
{
return string.Join(newValue, filename.Split(InvalidChars));
}
private static string Test6(string fileName, string newValue)
{
return InvalidChars.Aggregate(fileName, (current, c) => current.Replace(c.ToString(), newValue));
}
private static string Test7(string fileName, string newValue)
{
string regex = string.Format("[{0}]", Regex.Escape(InvalidCharsValue));
return Regex.Replace(fileName, regex, newValue, RegexOptions.Compiled);
}
private static string Test8(string fileName, string newValue)
{
string regex = string.Format("[{0}]", Regex.Escape(InvalidCharsValue));
Regex removeInvalidChars = new Regex(regex, RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.CultureInvariant);
return removeInvalidChars.Replace(fileName, newValue);
}
private static string Test9(string fileName, string newValue)
{
StringBuilder sb = new StringBuilder(fileName.Length);
bool changed = false;
for (int i = 0; i < fileName.Length; i++)
{
char c = fileName[i];
if (InvalidCharsHash.Contains(c))
{
changed = true;
sb.Append(newValue);
}
else
sb.Append(c);
}
if (sb.Length == 0)
return newValue;
return changed ? sb.ToString() : fileName;
}
private static string Test10(string fileName, string newValue)
{
if (!fileName.Any(c => InvalidChars.Contains(c)))
{
return fileName;
}
return new string(fileName.Where(c => !InvalidChars.Contains(c)).ToArray());
}
private static string Test11(string fileName, string newValue)
{
string invalidCharsRemoved = new string(fileName
.Where(x => !InvalidChars.Contains(x))
.ToArray());
return invalidCharsRemoved;
}