如果我像这样编码一个字符串:

var escapedString = originalString.stringByAddingPercentEscapesUsingEncoding(NSUTF8StringEncoding)

它没有逃脱斜杠/。

我搜索并找到了这段Objective C代码:

NSString *encodedString = (NSString *)CFURLCreateStringByAddingPercentEscapes(
                        NULL,
                        (CFStringRef)unencodedString,
                        NULL,
                        (CFStringRef)@"!*'();:@&=+$,/?%#[]",
                        kCFStringEncodingUTF8 );

是否有一个更简单的方法来编码一个URL,如果没有,我怎么写在Swift?


当前回答

这在Swift 5中为我工作。用例是从剪贴板或类似的地方获取URL,该URL可能已经有转义字符,但还包含Unicode字符,这可能导致URLComponents或URL(string:)失败。

首先,创建一个包含所有url合法字符的字符集:

extension CharacterSet {

    /// Characters valid in at least one part of a URL.
    ///
    /// These characters are not allowed in ALL parts of a URL; each part has different requirements. This set is useful for checking for Unicode characters that need to be percent encoded before performing a validity check on individual URL components.
    static var urlAllowedCharacters: CharacterSet {
        // Start by including hash, which isn't in any set
        var characters = CharacterSet(charactersIn: "#")
        // All URL-legal characters
        characters.formUnion(.urlUserAllowed)
        characters.formUnion(.urlPasswordAllowed)
        characters.formUnion(.urlHostAllowed)
        characters.formUnion(.urlPathAllowed)
        characters.formUnion(.urlQueryAllowed)
        characters.formUnion(.urlFragmentAllowed)

        return characters
    }
}

接下来,用一个方法扩展String来编码url:

extension String {

    /// Converts a string to a percent-encoded URL, including Unicode characters.
    ///
    /// - Returns: An encoded URL if all steps succeed, otherwise nil.
    func encodedUrl() -> URL? {        
        // Remove preexisting encoding,
        guard let decodedString = self.removingPercentEncoding,
            // encode any Unicode characters so URLComponents doesn't choke,
            let unicodeEncodedString = decodedString.addingPercentEncoding(withAllowedCharacters: .urlAllowedCharacters),
            // break into components to use proper encoding for each part,
            let components = URLComponents(string: unicodeEncodedString),
            // and reencode, to revert decoding while encoding missed characters.
            let percentEncodedUrl = components.url else {
            // Encoding failed
            return nil
        }

        return percentEncodedUrl
    }

}

可以这样测试:

let urlText = "https://www.example.com/폴더/search?q=123&foo=bar&multi=eggs+and+ham&hangul=한글&spaced=lovely%20spam&illegal=<>#top"
let url = encodedUrl(from: urlText)

url最后的值:https://www.example.com/%ED%8F%B4%EB%8D%94/search?q=123&foo=bar&multi=eggs+and+ham&hangul=%ED%95%9C%EA%B8%80&spaced=lovely%20spam&illegal=%3C%3E#top

注意,%20和+空格都被保留,Unicode字符被编码,原始urlText中的%20没有被双重编码,锚(片段或#)仍然保留。

编辑:现在检查每个组件的有效性。

其他回答

你可以使用URLComponents来避免手动对查询字符串进行百分比编码:

let scheme = "https"
let host = "www.google.com"
let path = "/search"
let queryItem = URLQueryItem(name: "q", value: "Formula One")


var urlComponents = URLComponents()
urlComponents.scheme = scheme
urlComponents.host = host
urlComponents.path = path
urlComponents.queryItems = [queryItem]

if let url = urlComponents.url {
    print(url)   // "https://www.google.com/search?q=Formula%20One"
}

extension URLComponents {
    init(scheme: String = "https",
         host: String = "www.google.com",
         path: String = "/search",
         queryItems: [URLQueryItem]) {
        self.init()
        self.scheme = scheme
        self.host = host
        self.path = path
        self.queryItems = queryItems
    }
}

let query = "Formula One"
if let url = URLComponents(queryItems: [URLQueryItem(name: "q", value: query)]).url {
    print(url)  // https://www.google.com/search?q=Formula%20One
}

一切都是一样的

var str = CFURLCreateStringByAddingPercentEscapes(
    nil,
    "test/test",
    nil,
    "!*'();:@&=+$,/?%#[]",
    CFStringBuiltInEncodings.UTF8.rawValue
)

// test%2Ftest

这在Swift 5中为我工作。用例是从剪贴板或类似的地方获取URL,该URL可能已经有转义字符,但还包含Unicode字符,这可能导致URLComponents或URL(string:)失败。

首先,创建一个包含所有url合法字符的字符集:

extension CharacterSet {

    /// Characters valid in at least one part of a URL.
    ///
    /// These characters are not allowed in ALL parts of a URL; each part has different requirements. This set is useful for checking for Unicode characters that need to be percent encoded before performing a validity check on individual URL components.
    static var urlAllowedCharacters: CharacterSet {
        // Start by including hash, which isn't in any set
        var characters = CharacterSet(charactersIn: "#")
        // All URL-legal characters
        characters.formUnion(.urlUserAllowed)
        characters.formUnion(.urlPasswordAllowed)
        characters.formUnion(.urlHostAllowed)
        characters.formUnion(.urlPathAllowed)
        characters.formUnion(.urlQueryAllowed)
        characters.formUnion(.urlFragmentAllowed)

        return characters
    }
}

接下来,用一个方法扩展String来编码url:

extension String {

    /// Converts a string to a percent-encoded URL, including Unicode characters.
    ///
    /// - Returns: An encoded URL if all steps succeed, otherwise nil.
    func encodedUrl() -> URL? {        
        // Remove preexisting encoding,
        guard let decodedString = self.removingPercentEncoding,
            // encode any Unicode characters so URLComponents doesn't choke,
            let unicodeEncodedString = decodedString.addingPercentEncoding(withAllowedCharacters: .urlAllowedCharacters),
            // break into components to use proper encoding for each part,
            let components = URLComponents(string: unicodeEncodedString),
            // and reencode, to revert decoding while encoding missed characters.
            let percentEncodedUrl = components.url else {
            // Encoding failed
            return nil
        }

        return percentEncodedUrl
    }

}

可以这样测试:

let urlText = "https://www.example.com/폴더/search?q=123&foo=bar&multi=eggs+and+ham&hangul=한글&spaced=lovely%20spam&illegal=<>#top"
let url = encodedUrl(from: urlText)

url最后的值:https://www.example.com/%ED%8F%B4%EB%8D%94/search?q=123&foo=bar&multi=eggs+and+ham&hangul=%ED%95%9C%EA%B8%80&spaced=lovely%20spam&illegal=%3C%3E#top

注意,%20和+空格都被保留,Unicode字符被编码,原始urlText中的%20没有被双重编码,锚(片段或#)仍然保留。

编辑:现在检查每个组件的有效性。

帮助我的是,我创建了一个单独的NSCharacterSet,并在一个UTF-8编码的字符串上使用它,即textToEncode来生成所需的结果:

var queryCharSet = NSCharacterSet.urlQueryAllowed
queryCharSet.remove(charactersIn: "+&?,:;@+=$*()")
    
let utfedCharacterSet = String(utf8String: textToEncode.cString(using: .utf8)!)!
let encodedStr = utfedCharacterSet.addingPercentEncoding(withAllowedCharacters: queryCharSet)!
    
let paramUrl = "https://api.abc.eu/api/search?device=true&query=\(escapedStr)"

斯威夫特3:

let originalString = "http://www.ihtc.cc?name=htc&title=iOS开发工程师"

1. encodingQuery:

let escapedString = originalString.addingPercentEncoding(withAllowedCharacters:NSCharacterSet.urlQueryAllowed)

结果:

"http://www.ihtc.cc?name=htc&title=iOS%E5%BC%80%E5%8F%91%E5%B7%A5%E7%A8%8B%E5%B8%88" 

2. 编码网址:

let escapedString = originalString.addingPercentEncoding(withAllowedCharacters: .urlHostAllowed)

结果:

"http:%2F%2Fwww.ihtc.cc%3Fname=htc&title=iOS%E5%BC%80%E5%8F%91%E5%B7%A5%E7%A8%8B%E5%B8%88"