在JavaScript中是否有一种方法来检查字符串是否是URL?

regex被排除在外,因为URL很可能写成stackoverflow;也就是说,它可能没有。com, WWW或http。


当前回答

我使用下面的函数来验证URL是否有http/https:

function isValidURL(string) { var res = string.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g); return (res !== null) }; var testCase1 = "http://en.wikipedia.org/wiki/Procter_&_Gamble"; console.log(isValidURL(testCase1)); // return true var testCase2 = "http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707"; console.log(isValidURL(testCase2)); // return true var testCase3 = "https://sdfasd"; console.log(isValidURL(testCase3)); // return false var testCase4 = "dfdsfdsfdfdsfsdfs"; console.log(isValidURL(testCase4)); // return false var testCase5 = "magnet:?xt=urn:btih:123"; console.log(isValidURL(testCase5)); // return false var testCase6 = "https://stackoverflow.com/"; console.log(isValidURL(testCase6)); // return true var testCase7 = "https://w"; console.log(isValidURL(testCase7)); // return false var testCase8 = "https://sdfasdp.ppppppppppp"; console.log(isValidURL(testCase8)); // return false

其他回答

在我的情况下,我唯一的要求是,用户输入不会被解释为一个相对链接时,放置在一个标签的href和这里的答案要么有点OTT,要么允许url不符合我的要求,所以这就是我要做的:

^https?://.+$

没有正则表达式,同样的事情也可以很容易地实现。

As has been noted the perfect regex is elusive but still seems to be a reasonable approach (alternatives are server side tests or the new experimental URL API). However the high ranking answers are often returning false for common URLs but even worse will freeze your app/page for minutes on even as simple a string as isURL('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'). It's been pointed out in some of the comments, but most probably haven't entered a bad value to see it. Hanging like that makes that code unusable in any serious application. I think it's due to the repeated case insensitive sets in code like ((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.?)+[a-z]{2,}|' .... Take out the 'i' and it doesn't hang but will of course not work as desired. But even with the ignore case flag those tests reject high unicode values that are allowed.

已经提到的最好的是:

function isURL(str) {
  return /^(?:\w+:)?\/\/([^\s\.]+\.\S{2}|localhost[\:?\d]*)\S*$/.test(str); 
}

That comes from Github segmentio/is-url. The good thing about a code repository is you can see the testing and any issues and also the test strings run through it. There's a branch that would allow strings missing protocol like google.com, though you're probably making too many assumptions then. The repository has been updated and I'm not planning on trying to keep up a mirror here. It's been broken up into separate tests to avoid RegEx redos which can be exploited for DOS attacks (I don't think you have to worry about that with client side js, but you do have to worry about your page hanging for so long that your visitor leaves your site).

在dperini/regex- webburl .js中,我还看到过另一个存储库,它可能更适合isURL,但它非常复杂。它有一个更大的有效和无效url的测试列表。上面的简单的一个仍然通过了所有的正面信息,只有一些奇怪的负面信息,如http://a.b--c.de/以及特殊的ip。

无论你选择哪一个,在使用浏览器的开发人员工具检查器时,通过这个函数运行它,这个函数是我从dperini/regex- webburl .js上的测试中改编的。

function testIsURL() {
//should match
console.assert(isURL("http://foo.com/blah_blah"));
console.assert(isURL("http://foo.com/blah_blah/"));
console.assert(isURL("http://foo.com/blah_blah_(wikipedia)"));
console.assert(isURL("http://foo.com/blah_blah_(wikipedia)_(again)"));
console.assert(isURL("http://www.example.com/wpstyle/?p=364"));
console.assert(isURL("https://www.example.com/foo/?bar=baz&inga=42&quux"));
console.assert(isURL("http://✪df.ws/123"));
console.assert(isURL("http://userid:password@example.com:8080"));
console.assert(isURL("http://userid:password@example.com:8080/"));
console.assert(isURL("http://userid@example.com"));
console.assert(isURL("http://userid@example.com/"));
console.assert(isURL("http://userid@example.com:8080"));
console.assert(isURL("http://userid@example.com:8080/"));
console.assert(isURL("http://userid:password@example.com"));
console.assert(isURL("http://userid:password@example.com/"));
console.assert(isURL("http://142.42.1.1/"));
console.assert(isURL("http://142.42.1.1:8080/"));
console.assert(isURL("http://➡.ws/䨹"));
console.assert(isURL("http://⌘.ws"));
console.assert(isURL("http://⌘.ws/"));
console.assert(isURL("http://foo.com/blah_(wikipedia)#cite-1"));
console.assert(isURL("http://foo.com/blah_(wikipedia)_blah#cite-1"));
console.assert(isURL("http://foo.com/unicode_(✪)_in_parens"));
console.assert(isURL("http://foo.com/(something)?after=parens"));
console.assert(isURL("http://☺.damowmow.com/"));
console.assert(isURL("http://code.google.com/events/#&product=browser"));
console.assert(isURL("http://j.mp"));
console.assert(isURL("ftp://foo.bar/baz"));
console.assert(isURL("http://foo.bar/?q=Test%20URL-encoded%20stuff"));
console.assert(isURL("http://مثال.إختبار"));
console.assert(isURL("http://例子.测试"));
console.assert(isURL("http://उदाहरण.परीक्षा"));
console.assert(isURL("http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com"));
console.assert(isURL("http://1337.net"));
console.assert(isURL("http://a.b-c.de"));
console.assert(isURL("http://223.255.255.254"));
console.assert(isURL("postgres://u:p@example.com:5702/db"));
console.assert(isURL("https://d1f4470da51b49289906b3d6cbd65074@app.getsentry.com/13176"));

//SHOULD NOT MATCH:
console.assert(!isURL("http://"));
console.assert(!isURL("http://."));
console.assert(!isURL("http://.."));
console.assert(!isURL("http://../"));
console.assert(!isURL("http://?"));
console.assert(!isURL("http://??"));
console.assert(!isURL("http://??/"));
console.assert(!isURL("http://#"));
console.assert(!isURL("http://##"));
console.assert(!isURL("http://##/"));
console.assert(!isURL("http://foo.bar?q=Spaces should be encoded"));
console.assert(!isURL("//"));
console.assert(!isURL("//a"));
console.assert(!isURL("///a"));
console.assert(!isURL("///"));
console.assert(!isURL("http:///a"));
console.assert(!isURL("foo.com"));
console.assert(!isURL("rdar://1234"));
console.assert(!isURL("h://test"));
console.assert(!isURL("http:// shouldfail.com"));
console.assert(!isURL(":// should fail"));
console.assert(!isURL("http://foo.bar/foo(bar)baz quux"));
console.assert(!isURL("ftps://foo.bar/"));
console.assert(!isURL("http://-error-.invalid/"));
console.assert(!isURL("http://a.b--c.de/"));
console.assert(!isURL("http://-a.b.co"));
console.assert(!isURL("http://a.b-.co"));
console.assert(!isURL("http://0.0.0.0"));
console.assert(!isURL("http://10.1.1.0"));
console.assert(!isURL("http://10.1.1.255"));
console.assert(!isURL("http://224.1.1.1"));
console.assert(!isURL("http://1.1.1.1.1"));
console.assert(!isURL("http://123.123.123"));
console.assert(!isURL("http://3628126748"));
console.assert(!isURL("http://.www.foo.bar/"));
console.assert(!isURL("http://www.foo.bar./"));
console.assert(!isURL("http://.www.foo.bar./"));
console.assert(!isURL("http://10.1.1.1"));}

然后测试这串a。

在你发布一个看起来很棒的正则表达式之前,看看Mathias Bynens对isURL正则表达式的比较,了解更多信息。

我将函数更改为Match +,在这里用斜杠和它的工作:(http://和https)进行更改

function isValidUrl(userInput) {
    var res = userInput.match(/(http(s)?:\/\/.)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g);
    if(res == null)
       return false;
    else
       return true;
}

使用纯正则表达式很难做到这一点,因为url有很多“不方便”的地方。

For example domain names have complicated restrictions on hyphens: a. It is allowed to have many consecutive hyphens in the middle. b. but the first character and last character of the domain name cannot be a hyphen c. The 3rd and 4th character cannot be both hyphen Similarly port number can only be in the range 1-65535. This is easy to check if you extract the port part and convert to int but quite difficult to check with a regular expression. There is also no easy way to check valid domain extensions. Some countries have second-level domains(such as 'co.uk'), or the extension can be a long word such as '.international'. And new TLDs are added regularly. This type of things can only be checked against a hard-coded list. (see https://en.wikipedia.org/wiki/Top-level_domain) Then there are magnet urls, ftp addresses etc. These all have different requirements.

然而,这里有一个函数可以处理几乎所有的事情,除了:

案例1。c 接受任何1-5位数的端口号 接受任何扩展2-13个字符 不接受ftp,磁铁等…

function isValidURL(input) { pattern = '^(https?:\\/\\/)?' + // protocol '((([a-zA-Z\\d]([a-zA-Z\\d-]{0,61}[a-zA-Z\\d])*\\.)+' + // sub-domain + domain name '[a-zA-Z]{2,13})' + // extension '|((\\d{1,3}\\.){3}\\d{1,3})' + // OR ip (v4) address '|localhost)' + // OR localhost '(\\:\\d{1,5})?' + // port '(\\/[a-zA-Z\\&\\d%_.~+-:@]*)*' + // path '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string '(\\#[-a-zA-Z&\\d_]*)?$'; // fragment locator regex = new RegExp(pattern); return regex.test(input); } let tests = []; tests.push(['', false]); tests.push(['http://en.wikipedia.org/wiki/Procter_&_Gamble', true]); tests.push(['https://sdfasd', false]); tests.push(['http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707', true]); tests.push(['https://stackoverflow.com/', true]); tests.push(['https://w', false]); tests.push(['aaa', false]); tests.push(['aaaa', false]); tests.push(['oh.my', true]); tests.push(['dfdsfdsfdfdsfsdfs', false]); tests.push(['google.co.uk', true]); tests.push(['test-domain.MUSEUM', true]); tests.push(['-hyphen-start.gov.tr', false]); tests.push(['hyphen-end-.com', false]); tests.push(['https://sdfasdp.international', true]); tests.push(['https://sdfasdp.pppppppp', false]); tests.push(['https://sdfasdp.ppppppppppppppppppp', false]); tests.push(['https://sdfasd', false]); tests.push(['https://sub1.1234.sub3.sub4.sub5.co.uk/?', true]); tests.push(['http://www.google-com.123', false]); tests.push(['http://my--testdomain.com', false]); tests.push(['http://my2nd--testdomain.com', true]); tests.push(['http://thingiverse.com/download:1894343', true]); tests.push(['https://medium.com/@techytimo', true]); tests.push(['http://localhost', true]); tests.push(['localhost', true]); tests.push(['localhost:8080', true]); tests.push(['localhost:65536', true]); tests.push(['localhost:80000', false]); tests.push(['magnet:?xt=urn:btih:123', true]); for (let i = 0; i < tests.length; i++) { console.log('Test #' + i + (isValidURL(tests[i][0]) == tests[i][1] ? ' passed' : ' failed') + ' on ["' + tests[i][0] + '", ' + tests[i][1] + ']'); }

function isURL(_url)
{
    let result = false;
    let w = window;

    if (!w._check_input)
    {
        let input = document.createElement("input");
        input.type      = "url";
        input.required  = true;

        w._check_input = input;
    }

    w._check_input.value = _url;
    if (w._check_input.checkValidity()) result = true;

    return result;
}