什么是一个好的完整正则表达式或其他一些过程,将采取标题:
如何将标题更改为URL的一部分,如堆栈溢出?
然后把它变成
how-do-you-change-a-title-to-be-part-of-the-url-like-stack-overflow
在堆栈溢出的seo友好的url中使用?
我使用的开发环境是Ruby on Rails,但是如果有一些其他特定于平台的解决方案(。NET, PHP, Django),我也很想看到这些。
我相信我(或其他读者)在不同的平台上也会遇到同样的问题。
我使用自定义路由,我主要想知道如何改变字符串的所有特殊字符被删除,它都是小写的,所有空白被替换。
我将代码移植到TypeScript中。它可以很容易地适应JavaScript。
我添加了一个.contains方法到字符串原型,如果你的目标是最新的浏览器或ES6,你可以使用.includes代替。
if (!String.prototype.contains) {
String.prototype.contains = function (check) {
return this.indexOf(check, 0) !== -1;
};
}
declare interface String {
contains(check: string): boolean;
}
export function MakeUrlFriendly(title: string) {
if (title == null || title == '')
return '';
const maxlen = 80;
let len = title.length;
let prevdash = false;
let result = '';
let c: string;
let cc: number;
let remapInternationalCharToAscii = function (c: string) {
let s = c.toLowerCase();
if ("àåáâäãåą".contains(s)) {
return "a";
}
else if ("èéêëę".contains(s)) {
return "e";
}
else if ("ìíîïı".contains(s)) {
return "i";
}
else if ("òóôõöøőð".contains(s)) {
return "o";
}
else if ("ùúûüŭů".contains(s)) {
return "u";
}
else if ("çćčĉ".contains(s)) {
return "c";
}
else if ("żźž".contains(s)) {
return "z";
}
else if ("śşšŝ".contains(s)) {
return "s";
}
else if ("ñń".contains(s)) {
return "n";
}
else if ("ýÿ".contains(s)) {
return "y";
}
else if ("ğĝ".contains(s)) {
return "g";
}
else if (c == 'ř') {
return "r";
}
else if (c == 'ł') {
return "l";
}
else if (c == 'đ') {
return "d";
}
else if (c == 'ß') {
return "ss";
}
else if (c == 'Þ') {
return "th";
}
else if (c == 'ĥ') {
return "h";
}
else if (c == 'ĵ') {
return "j";
}
else {
return "";
}
};
for (let i = 0; i < len; i++) {
c = title[i];
cc = c.charCodeAt(0);
if ((cc >= 97 /* a */ && cc <= 122 /* z */) || (cc >= 48 /* 0 */ && cc <= 57 /* 9 */)) {
result += c;
prevdash = false;
}
else if ((cc >= 65 && cc <= 90 /* A - Z */)) {
result += c.toLowerCase();
prevdash = false;
}
else if (c == ' ' || c == ',' || c == '.' || c == '/' || c == '\\' || c == '-' || c == '_' || c == '=') {
if (!prevdash && result.length > 0) {
result += '-';
prevdash = true;
}
}
else if (cc >= 128) {
let prevlen = result.length;
result += remapInternationalCharToAscii(c);
if (prevlen != result.length) prevdash = false;
}
if (i == maxlen) break;
}
if (prevdash)
return result.substring(0, result.length - 1);
else
return result;
}
我喜欢这种不使用正则表达式的方式,所以我将其移植到PHP。我刚刚添加了一个名为is_between的函数来检查字符:
function is_between($val, $min, $max)
{
$val = (int) $val; $min = (int) $min; $max = (int) $max;
return ($val >= $min && $val <= $max);
}
function international_char_to_ascii($char)
{
if (mb_strpos('àåáâäãåa', $char) !== false)
{
return 'a';
}
if (mb_strpos('èéêëe', $char) !== false)
{
return 'e';
}
if (mb_strpos('ìíîïi', $char) !== false)
{
return 'i';
}
if (mb_strpos('òóôõö', $char) !== false)
{
return 'o';
}
if (mb_strpos('ùúûüuu', $char) !== false)
{
return 'u';
}
if (mb_strpos('çccc', $char) !== false)
{
return 'c';
}
if (mb_strpos('zzž', $char) !== false)
{
return 'z';
}
if (mb_strpos('ssšs', $char) !== false)
{
return 's';
}
if (mb_strpos('ñn', $char) !== false)
{
return 'n';
}
if (mb_strpos('ýÿ', $char) !== false)
{
return 'y';
}
if (mb_strpos('gg', $char) !== false)
{
return 'g';
}
if (mb_strpos('r', $char) !== false)
{
return 'r';
}
if (mb_strpos('l', $char) !== false)
{
return 'l';
}
if (mb_strpos('d', $char) !== false)
{
return 'd';
}
if (mb_strpos('ß', $char) !== false)
{
return 'ss';
}
if (mb_strpos('Þ', $char) !== false)
{
return 'th';
}
if (mb_strpos('h', $char) !== false)
{
return 'h';
}
if (mb_strpos('j', $char) !== false)
{
return 'j';
}
return '';
}
function url_friendly_title($url_title)
{
if (empty($url_title))
{
return '';
}
$url_title = mb_strtolower($url_title);
$url_title_max_length = 80;
$url_title_length = mb_strlen($url_title);
$url_title_friendly = '';
$url_title_dash_added = false;
$url_title_char = '';
for ($i = 0; $i < $url_title_length; $i++)
{
$url_title_char = mb_substr($url_title, $i, 1);
if (strlen($url_title_char) == 2)
{
$url_title_ascii = ord($url_title_char[0]) * 256 + ord($url_title_char[1]) . "\r\n";
}
else
{
$url_title_ascii = ord($url_title_char);
}
if (is_between($url_title_ascii, 97, 122) || is_between($url_title_ascii, 48, 57))
{
$url_title_friendly .= $url_title_char;
$url_title_dash_added = false;
}
elseif(is_between($url_title_ascii, 65, 90))
{
$url_title_friendly .= chr(($url_title_ascii | 32));
$url_title_dash_added = false;
}
elseif($url_title_ascii == 32 || $url_title_ascii == 44 || $url_title_ascii == 46 || $url_title_ascii == 47 || $url_title_ascii == 92 || $url_title_ascii == 45 || $url_title_ascii == 47 || $url_title_ascii == 95 || $url_title_ascii == 61)
{
if (!$url_title_dash_added && mb_strlen($url_title_friendly) > 0)
{
$url_title_friendly .= chr(45);
$url_title_dash_added = true;
}
}
else if ($url_title_ascii >= 128)
{
$url_title_previous_length = mb_strlen($url_title_friendly);
$url_title_friendly .= international_char_to_ascii($url_title_char);
if ($url_title_previous_length != mb_strlen($url_title_friendly))
{
$url_title_dash_added = false;
}
}
if ($i == $url_title_max_length)
{
break;
}
}
if ($url_title_dash_added)
{
return mb_substr($url_title_friendly, 0, -1);
}
else
{
return $url_title_friendly;
}
}
T-SQL实现,改编自dbo。UrlEncode:
CREATE FUNCTION dbo.Slug(@string varchar(1024))
RETURNS varchar(3072)
AS
BEGIN
DECLARE @count int, @c char(1), @i int, @slug varchar(3072)
SET @string = replace(lower(ltrim(rtrim(@string))),' ','-')
SET @count = Len(@string)
SET @i = 1
SET @slug = ''
WHILE (@i <= @count)
BEGIN
SET @c = substring(@string, @i, 1)
IF @c LIKE '[a-z0-9--]'
SET @slug = @slug + @c
SET @i = @i +1
END
RETURN @slug
END
我不熟悉Ruby on Rails,但以下是(未经测试的)PHP代码。如果您觉得有用的话,可以很快地将其转换为Ruby on Rails。
$sURL = "This is a title to convert to URL-format. It has 1 number in it!";
// To lower-case
$sURL = strtolower($sURL);
// Replace all non-word characters with spaces
$sURL = preg_replace("/\W+/", " ", $sURL);
// Remove trailing spaces (so we won't end with a separator)
$sURL = trim($sURL);
// Replace spaces with separators (hyphens)
$sURL = str_replace(" ", "-", $sURL);
echo $sURL;
// outputs: this-is-a-title-to-convert-to-url-format-it-has-1-number-in-it
我希望这能有所帮助。
我将代码移植到TypeScript中。它可以很容易地适应JavaScript。
我添加了一个.contains方法到字符串原型,如果你的目标是最新的浏览器或ES6,你可以使用.includes代替。
if (!String.prototype.contains) {
String.prototype.contains = function (check) {
return this.indexOf(check, 0) !== -1;
};
}
declare interface String {
contains(check: string): boolean;
}
export function MakeUrlFriendly(title: string) {
if (title == null || title == '')
return '';
const maxlen = 80;
let len = title.length;
let prevdash = false;
let result = '';
let c: string;
let cc: number;
let remapInternationalCharToAscii = function (c: string) {
let s = c.toLowerCase();
if ("àåáâäãåą".contains(s)) {
return "a";
}
else if ("èéêëę".contains(s)) {
return "e";
}
else if ("ìíîïı".contains(s)) {
return "i";
}
else if ("òóôõöøőð".contains(s)) {
return "o";
}
else if ("ùúûüŭů".contains(s)) {
return "u";
}
else if ("çćčĉ".contains(s)) {
return "c";
}
else if ("żźž".contains(s)) {
return "z";
}
else if ("śşšŝ".contains(s)) {
return "s";
}
else if ("ñń".contains(s)) {
return "n";
}
else if ("ýÿ".contains(s)) {
return "y";
}
else if ("ğĝ".contains(s)) {
return "g";
}
else if (c == 'ř') {
return "r";
}
else if (c == 'ł') {
return "l";
}
else if (c == 'đ') {
return "d";
}
else if (c == 'ß') {
return "ss";
}
else if (c == 'Þ') {
return "th";
}
else if (c == 'ĥ') {
return "h";
}
else if (c == 'ĵ') {
return "j";
}
else {
return "";
}
};
for (let i = 0; i < len; i++) {
c = title[i];
cc = c.charCodeAt(0);
if ((cc >= 97 /* a */ && cc <= 122 /* z */) || (cc >= 48 /* 0 */ && cc <= 57 /* 9 */)) {
result += c;
prevdash = false;
}
else if ((cc >= 65 && cc <= 90 /* A - Z */)) {
result += c.toLowerCase();
prevdash = false;
}
else if (c == ' ' || c == ',' || c == '.' || c == '/' || c == '\\' || c == '-' || c == '_' || c == '=') {
if (!prevdash && result.length > 0) {
result += '-';
prevdash = true;
}
}
else if (cc >= 128) {
let prevlen = result.length;
result += remapInternationalCharToAscii(c);
if (prevlen != result.length) prevdash = false;
}
if (i == maxlen) break;
}
if (prevdash)
return result.substring(0, result.length - 1);
else
return result;
}