在JavaScript中是否有一种方法来检查字符串是否是URL?

regex被排除在外,因为URL很可能写成stackoverflow;也就是说,它可能没有。com, WWW或http。


当前回答

使用纯正则表达式很难做到这一点,因为url有很多“不方便”的地方。

For example domain names have complicated restrictions on hyphens: a. It is allowed to have many consecutive hyphens in the middle. b. but the first character and last character of the domain name cannot be a hyphen c. The 3rd and 4th character cannot be both hyphen Similarly port number can only be in the range 1-65535. This is easy to check if you extract the port part and convert to int but quite difficult to check with a regular expression. There is also no easy way to check valid domain extensions. Some countries have second-level domains(such as 'co.uk'), or the extension can be a long word such as '.international'. And new TLDs are added regularly. This type of things can only be checked against a hard-coded list. (see https://en.wikipedia.org/wiki/Top-level_domain) Then there are magnet urls, ftp addresses etc. These all have different requirements.

然而,这里有一个函数可以处理几乎所有的事情,除了:

案例1。c 接受任何1-5位数的端口号 接受任何扩展2-13个字符 不接受ftp,磁铁等…

function isValidURL(input) { pattern = '^(https?:\\/\\/)?' + // protocol '((([a-zA-Z\\d]([a-zA-Z\\d-]{0,61}[a-zA-Z\\d])*\\.)+' + // sub-domain + domain name '[a-zA-Z]{2,13})' + // extension '|((\\d{1,3}\\.){3}\\d{1,3})' + // OR ip (v4) address '|localhost)' + // OR localhost '(\\:\\d{1,5})?' + // port '(\\/[a-zA-Z\\&\\d%_.~+-:@]*)*' + // path '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string '(\\#[-a-zA-Z&\\d_]*)?$'; // fragment locator regex = new RegExp(pattern); return regex.test(input); } let tests = []; tests.push(['', false]); tests.push(['http://en.wikipedia.org/wiki/Procter_&_Gamble', true]); tests.push(['https://sdfasd', false]); tests.push(['http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707', true]); tests.push(['https://stackoverflow.com/', true]); tests.push(['https://w', false]); tests.push(['aaa', false]); tests.push(['aaaa', false]); tests.push(['oh.my', true]); tests.push(['dfdsfdsfdfdsfsdfs', false]); tests.push(['google.co.uk', true]); tests.push(['test-domain.MUSEUM', true]); tests.push(['-hyphen-start.gov.tr', false]); tests.push(['hyphen-end-.com', false]); tests.push(['https://sdfasdp.international', true]); tests.push(['https://sdfasdp.pppppppp', false]); tests.push(['https://sdfasdp.ppppppppppppppppppp', false]); tests.push(['https://sdfasd', false]); tests.push(['https://sub1.1234.sub3.sub4.sub5.co.uk/?', true]); tests.push(['http://www.google-com.123', false]); tests.push(['http://my--testdomain.com', false]); tests.push(['http://my2nd--testdomain.com', true]); tests.push(['http://thingiverse.com/download:1894343', true]); tests.push(['https://medium.com/@techytimo', true]); tests.push(['http://localhost', true]); tests.push(['localhost', true]); tests.push(['localhost:8080', true]); tests.push(['localhost:65536', true]); tests.push(['localhost:80000', false]); tests.push(['magnet:?xt=urn:btih:123', true]); for (let i = 0; i < tests.length; i++) { console.log('Test #' + i + (isValidURL(tests[i][0]) == tests[i][1] ? ' passed' : ' failed') + ' on ["' + tests[i][0] + '", ' + tests[i][1] + ']'); }

其他回答

使用纯正则表达式很难做到这一点,因为url有很多“不方便”的地方。

For example domain names have complicated restrictions on hyphens: a. It is allowed to have many consecutive hyphens in the middle. b. but the first character and last character of the domain name cannot be a hyphen c. The 3rd and 4th character cannot be both hyphen Similarly port number can only be in the range 1-65535. This is easy to check if you extract the port part and convert to int but quite difficult to check with a regular expression. There is also no easy way to check valid domain extensions. Some countries have second-level domains(such as 'co.uk'), or the extension can be a long word such as '.international'. And new TLDs are added regularly. This type of things can only be checked against a hard-coded list. (see https://en.wikipedia.org/wiki/Top-level_domain) Then there are magnet urls, ftp addresses etc. These all have different requirements.

然而,这里有一个函数可以处理几乎所有的事情,除了:

案例1。c 接受任何1-5位数的端口号 接受任何扩展2-13个字符 不接受ftp,磁铁等…

function isValidURL(input) { pattern = '^(https?:\\/\\/)?' + // protocol '((([a-zA-Z\\d]([a-zA-Z\\d-]{0,61}[a-zA-Z\\d])*\\.)+' + // sub-domain + domain name '[a-zA-Z]{2,13})' + // extension '|((\\d{1,3}\\.){3}\\d{1,3})' + // OR ip (v4) address '|localhost)' + // OR localhost '(\\:\\d{1,5})?' + // port '(\\/[a-zA-Z\\&\\d%_.~+-:@]*)*' + // path '(\\?[a-zA-Z\\&\\d%_.,~+-:@=;&]*)?' + // query string '(\\#[-a-zA-Z&\\d_]*)?$'; // fragment locator regex = new RegExp(pattern); return regex.test(input); } let tests = []; tests.push(['', false]); tests.push(['http://en.wikipedia.org/wiki/Procter_&_Gamble', true]); tests.push(['https://sdfasd', false]); tests.push(['http://www.google.com/url?sa=i&rct=j&q=&esrc=s&source=images&cd=&docid=nIv5rk2GyP3hXM&tbnid=isiOkMe3nCtexM:&ved=0CAUQjRw&url=http%3A%2F%2Fanimalcrossing.wikia.com%2Fwiki%2FLion&ei=ygZXU_2fGKbMsQTf4YLgAQ&bvm=bv.65177938,d.aWc&psig=AFQjCNEpBfKnal9kU7Zu4n7RnEt2nerN4g&ust=1398298682009707', true]); tests.push(['https://stackoverflow.com/', true]); tests.push(['https://w', false]); tests.push(['aaa', false]); tests.push(['aaaa', false]); tests.push(['oh.my', true]); tests.push(['dfdsfdsfdfdsfsdfs', false]); tests.push(['google.co.uk', true]); tests.push(['test-domain.MUSEUM', true]); tests.push(['-hyphen-start.gov.tr', false]); tests.push(['hyphen-end-.com', false]); tests.push(['https://sdfasdp.international', true]); tests.push(['https://sdfasdp.pppppppp', false]); tests.push(['https://sdfasdp.ppppppppppppppppppp', false]); tests.push(['https://sdfasd', false]); tests.push(['https://sub1.1234.sub3.sub4.sub5.co.uk/?', true]); tests.push(['http://www.google-com.123', false]); tests.push(['http://my--testdomain.com', false]); tests.push(['http://my2nd--testdomain.com', true]); tests.push(['http://thingiverse.com/download:1894343', true]); tests.push(['https://medium.com/@techytimo', true]); tests.push(['http://localhost', true]); tests.push(['localhost', true]); tests.push(['localhost:8080', true]); tests.push(['localhost:65536', true]); tests.push(['localhost:80000', false]); tests.push(['magnet:?xt=urn:btih:123', true]); for (let i = 0; i < tests.length; i++) { console.log('Test #' + i + (isValidURL(tests[i][0]) == tests[i][1] ? ' passed' : ' failed') + ' on ["' + tests[i][0] + '", ' + tests[i][1] + ']'); }

这似乎是CS中最难的问题之一;)

这是另一个不完整的解决方案,它对我来说足够好,比我在这里看到的其他解决方案更好。为了支持IE11,我使用了一个输入[type=url],否则使用window会简单得多。URL来执行验证:

const ipv4Regex = /^(\d{1,3}\.){3}\d{1,3}$/; function isValidIpv4(ip) { if (!ipv4Regex.test(ip)) return false; return !ip.split('.').find(n => n > 255); } const domainRegex = /(?:[a-z0-9-]{1,63}\.){1,125}[a-z]{2,63}$/i; function isValidDomain(domain) { return isValidIpv4(domain) || domainRegex.test(domain); } let input; function validateUrl(url) { if (! /^https?:\/\//.test(url)) url = `http://${url}`; // assuming Babel is used // to support IE11 we'll resort to input[type=url] instead of window.URL: // try { return isValidDomain(new URL(url).host) && url; } catch(e) { return false; } if (!input) { input = document.createElement('input'); input.type = 'url'; } input.value = url; if (! input.validity.valid) return false; const domain = url.split(/^https?:\/\//)[1].split('/')[0].split('@').pop(); return isValidDomain(domain) && url; } console.log(validateUrl('google'), // false validateUrl('user:pw@mydomain.com'), validateUrl('https://google.com'), validateUrl('100.100.100.100/abc'), validateUrl('100.100.100.256/abc')); // false

为了接受不完整的输入,例如“www.mydomain.com”,它还将使其有效,假设在这些情况下协议是“http”,如果地址有效,则返回有效的URL。无效时返回false。

它还支持IPv4域,但不支持IPv6域。

这里还有另一种方法。

// ***note***: if the incoming value is empty(""), the function returns true var elm; function isValidURL(u){ //A precaution/solution for the problem written in the ***note*** if(u!==""){ if(!elm){ elm = document.createElement('input'); elm.setAttribute('type', 'url'); } elm.value = u; return elm.validity.valid; } else{ return false } } console.log(isValidURL('')); console.log(isValidURL('http://www.google.com/')); console.log(isValidURL('//google.com')); console.log(isValidURL('google.com')); console.log(isValidURL('localhost:8000'));

您可以使用ajax请求来检查字符串是否有效的url和可访问的

(function() { $("input").change(function() { const check = $.ajax({ url : this.value, dataType: "jsonp" }); check.then(function() { console.log("Site is valid and registered"); }); //expected output check.catch(function(reason) { if(reason.status === 200) { return console.log("Site is valid and registered"); } console.log("Not a valid site"); }) }); })() <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script> <input type="text" placeholder="Please input url to check ? ">

在我的情况下,我唯一的要求是,用户输入不会被解释为一个相对链接时,放置在一个标签的href和这里的答案要么有点OTT,要么允许url不符合我的要求,所以这就是我要做的:

^https?://.+$

没有正则表达式,同样的事情也可以很容易地实现。