我需要一个强大的和简单的方法来删除非法的路径和文件字符从一个简单的字符串。我已经使用了下面的代码,但它似乎没有做任何事情,我错过了什么?

using System;
using System.IO;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            string illegal = "\"M<>\"\\a/ry/ h**ad:>> a\\/:*?\"<>| li*tt|le|| la\"mb.?";

            illegal = illegal.Trim(Path.GetInvalidFileNameChars());
            illegal = illegal.Trim(Path.GetInvalidPathChars());

            Console.WriteLine(illegal);
            Console.ReadLine();
        }
    }
}

当前回答

下面的代码片段应该对。net 3及更高版本有所帮助。

using System.IO;
using System.Text.RegularExpressions;

public static class PathValidation
{
    private static string pathValidatorExpression = "^[^" + string.Join("", Array.ConvertAll(Path.GetInvalidPathChars(), x => Regex.Escape(x.ToString()))) + "]+$";
    private static Regex pathValidator = new Regex(pathValidatorExpression, RegexOptions.Compiled);

    private static string fileNameValidatorExpression = "^[^" + string.Join("", Array.ConvertAll(Path.GetInvalidFileNameChars(), x => Regex.Escape(x.ToString()))) + "]+$";
    private static Regex fileNameValidator = new Regex(fileNameValidatorExpression, RegexOptions.Compiled);

    private static string pathCleanerExpression = "[" + string.Join("", Array.ConvertAll(Path.GetInvalidPathChars(), x => Regex.Escape(x.ToString()))) + "]";
    private static Regex pathCleaner = new Regex(pathCleanerExpression, RegexOptions.Compiled);

    private static string fileNameCleanerExpression = "[" + string.Join("", Array.ConvertAll(Path.GetInvalidFileNameChars(), x => Regex.Escape(x.ToString()))) + "]";
    private static Regex fileNameCleaner = new Regex(fileNameCleanerExpression, RegexOptions.Compiled);

    public static bool ValidatePath(string path)
    {
        return pathValidator.IsMatch(path);
    }

    public static bool ValidateFileName(string fileName)
    {
        return fileNameValidator.IsMatch(fileName);
    }

    public static string CleanPath(string path)
    {
        return pathCleaner.Replace(path, "");
    }

    public static string CleanFileName(string fileName)
    {
        return fileNameCleaner.Replace(fileName, "");
    }
}

其他回答

上面的大多数解决方案都将路径和文件名的非法字符组合在一起,这是错误的(即使两个调用当前返回相同的字符集)。我将首先在path和filename中分割path+filename,然后应用适当的集,如果它们,然后再次结合两者。

wvd_vegt

我使用正则表达式来实现这一点。首先,我动态地构建正则表达式。

string regex = string.Format(
                   "[{0}]",
                   Regex.Escape(new string(Path.GetInvalidFileNameChars())));
Regex removeInvalidChars = new Regex(regex, RegexOptions.Singleline | RegexOptions.Compiled | RegexOptions.CultureInvariant);

然后我调用removeInvalidChars。替换来做查找和替换。这显然也可以扩展到覆盖路径字符。

下面的代码片段应该对。net 3及更高版本有所帮助。

using System.IO;
using System.Text.RegularExpressions;

public static class PathValidation
{
    private static string pathValidatorExpression = "^[^" + string.Join("", Array.ConvertAll(Path.GetInvalidPathChars(), x => Regex.Escape(x.ToString()))) + "]+$";
    private static Regex pathValidator = new Regex(pathValidatorExpression, RegexOptions.Compiled);

    private static string fileNameValidatorExpression = "^[^" + string.Join("", Array.ConvertAll(Path.GetInvalidFileNameChars(), x => Regex.Escape(x.ToString()))) + "]+$";
    private static Regex fileNameValidator = new Regex(fileNameValidatorExpression, RegexOptions.Compiled);

    private static string pathCleanerExpression = "[" + string.Join("", Array.ConvertAll(Path.GetInvalidPathChars(), x => Regex.Escape(x.ToString()))) + "]";
    private static Regex pathCleaner = new Regex(pathCleanerExpression, RegexOptions.Compiled);

    private static string fileNameCleanerExpression = "[" + string.Join("", Array.ConvertAll(Path.GetInvalidFileNameChars(), x => Regex.Escape(x.ToString()))) + "]";
    private static Regex fileNameCleaner = new Regex(fileNameCleanerExpression, RegexOptions.Compiled);

    public static bool ValidatePath(string path)
    {
        return pathValidator.IsMatch(path);
    }

    public static bool ValidateFileName(string fileName)
    {
        return fileNameValidator.IsMatch(fileName);
    }

    public static string CleanPath(string path)
    {
        return pathCleaner.Replace(path, "");
    }

    public static string CleanFileName(string fileName)
    {
        return fileNameCleaner.Replace(fileName, "");
    }
}

我写这个怪物是为了好玩,它让你可以往返:

public static class FileUtility
{
    private const char PrefixChar = '%';
    private static readonly int MaxLength;
    private static readonly Dictionary<char,char[]> Illegals;
    static FileUtility()
    {
        List<char> illegal = new List<char> { PrefixChar };
        illegal.AddRange(Path.GetInvalidFileNameChars());
        MaxLength = illegal.Select(x => ((int)x).ToString().Length).Max();
        Illegals = illegal.ToDictionary(x => x, x => ((int)x).ToString("D" + MaxLength).ToCharArray());
    }

    public static string FilenameEncode(string s)
    {
        var builder = new StringBuilder();
        char[] replacement;
        using (var reader = new StringReader(s))
        {
            while (true)
            {
                int read = reader.Read();
                if (read == -1)
                    break;
                char c = (char)read;
                if(Illegals.TryGetValue(c,out replacement))
                {
                    builder.Append(PrefixChar);
                    builder.Append(replacement);
                }
                else
                {
                    builder.Append(c);
                }
            }
        }
        return builder.ToString();
    }

    public static string FilenameDecode(string s)
    {
        var builder = new StringBuilder();
        char[] buffer = new char[MaxLength];
        using (var reader = new StringReader(s))
        {
            while (true)
            {
                int read = reader.Read();
                if (read == -1)
                    break;
                char c = (char)read;
                if (c == PrefixChar)
                {
                    reader.Read(buffer, 0, MaxLength);
                    var encoded =(char) ParseCharArray(buffer);
                    builder.Append(encoded);
                }
                else
                {
                    builder.Append(c);
                }
            }
        }
        return builder.ToString();
    }

    public static int ParseCharArray(char[] buffer)
    {
        int result = 0;
        foreach (char t in buffer)
        {
            int digit = t - '0';
            if ((digit < 0) || (digit > 9))
            {
                throw new ArgumentException("Input string was not in the correct format");
            }
            result *= 10;
            result += digit;
        }
        return result;
    }
}

我已经滚动了我自己的方法,这似乎要快得多的其他张贴在这里(特别是正则表达式是如此缓慢),但我没有测试所有张贴的方法。

https://dotnetfiddle.net/haIXiY

第一个方法(我的)和第二个方法(也是我的,但旧的)也对反斜杠进行了额外的检查,所以基准测试并不完美,但无论如何,这只是给你一个想法。

在我的笔记本电脑上的结果(10万次迭代):

StringHelper.RemoveInvalidCharacters 1: 451 ms  
StringHelper.RemoveInvalidCharacters 2: 7139 ms  
StringHelper.RemoveInvalidCharacters 3: 2447 ms  
StringHelper.RemoveInvalidCharacters 4: 3733 ms  
StringHelper.RemoveInvalidCharacters 5: 11689 ms  (==> Regex!)

最快的方法:

public static string RemoveInvalidCharacters(string content, char replace = '_', bool doNotReplaceBackslashes = false)
{
    if (string.IsNullOrEmpty(content))
        return content;

    var idx = content.IndexOfAny(InvalidCharacters);
    if (idx >= 0)
    {
        var sb = new StringBuilder(content);
        while (idx >= 0)
        {
            if (sb[idx] != '\\' || !doNotReplaceBackslashes)
                sb[idx] = replace;
            idx = content.IndexOfAny(InvalidCharacters, idx+1);
        }
        return sb.ToString();
    }
    return content;
}

方法没有“按原样”编译InvalidCharacters属性,请检查fiddle是否有完整代码