是否有一个简单的方法来转换字符串标题大小写?例如,约翰·史密斯变成了约翰·史密斯。我不是在寻找像John Resig的解决方案那样复杂的东西,只是(希望)一些一两行代码。
当前回答
我已经为土耳其语测试了这个解决方案,它也适用于特殊字符。
function toTitleCase(str) 归来str replace (toLocaleLowerCase()。 - (^ | U | U |Ş|ş| |是C |İ|ı| |列传| w) \ S * / g, (txt) => txt.charAt(0)。 ) 的 控制台日志(toTitleCase(高中İİL HAKKI’) 控制台日志(toTitleCase(“ŞAHMARAN BİNBİR GECE MASALLARI”) 控制台日志(toTitleCase(“TEKNOLOJİURUNU”)
我在开头添加了“toLocaleLowerCase”,因为我所有的数据都是大写的。如果你不需要它,你可以丢弃它。
使用区域操作对于非英语语言很重要。
其他回答
基准
博士TL;
这个基准测试的赢家是简单的for循环:
function titleize(str) {
let upper = true
let newStr = ""
for (let i = 0, l = str.length; i < l; i++) {
// Note that you can also check for all kinds of spaces with
// str[i].match(/\s/)
if (str[i] == " ") {
upper = true
newStr += str[i]
continue
}
newStr += upper ? str[i].toUpperCase() : str[i].toLowerCase()
upper = false
}
return newStr
}
// NOTE: you could beat that using charcode and string builder I guess.
细节
我选取了最流行和最独特的答案,并以此为基准。
下面是我MacBook pro上的结果:
为了完整起见,这里是所使用的函数:
str = "the QUICK BrOWn Fox jUMPS oVeR the LAzy doG";
function regex(str) {
return str.replace(
/\w\S*/g,
function(txt) {
return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
}
);
}
function split(str) {
return str.
split(' ').
map(w => w[0].toUpperCase() + w.substr(1).toLowerCase()).
join(' ');
}
function complete(str) {
var i, j, str, lowers, uppers;
str = str.replace(/([^\W_]+[^\s-]*) */g, function(txt) {
return txt.charAt(0).toUpperCase() + txt.substr(1).toLowerCase();
});
// Certain minor words should be left lowercase unless
// they are the first or last words in the string
lowers = ['A', 'An', 'The', 'And', 'But', 'Or', 'For', 'Nor', 'As', 'At',
'By', 'For', 'From', 'In', 'Into', 'Near', 'Of', 'On', 'Onto', 'To', 'With'];
for (i = 0, j = lowers.length; i < j; i++)
str = str.replace(new RegExp('\\s' + lowers[i] + '\\s', 'g'),
function(txt) {
return txt.toLowerCase();
});
// Certain words such as initialisms or acronyms should be left uppercase
uppers = ['Id', 'Tv'];
for (i = 0, j = uppers.length; i < j; i++)
str = str.replace(new RegExp('\\b' + uppers[i] + '\\b', 'g'),
uppers[i].toUpperCase());
return str;
}
function firstLetterOnly(str) {
return str.replace(/\b(\S)/g, function(t) { return t.toUpperCase(); });
}
function forLoop(str) {
let upper = true;
let newStr = "";
for (let i = 0, l = str.length; i < l; i++) {
if (str[i] == " ") {
upper = true;
newStr += " ";
continue;
}
newStr += upper ? str[i].toUpperCase() : str[i].toLowerCase();
upper = false;
}
return newStr;
}
请注意,我故意没有改变原型,因为我认为这是一个非常糟糕的做法,我认为我们不应该在我们的回答中推广这种做法。这只适用于小型代码库,如果只有你一个人在使用它。
如果你想添加任何其他方法来做这个基准测试,请评论一个链接到答案!
EDIT 2022 Mac M1:在我的新电脑上,使用最新的chrome浏览器,拆分胜出。如果您真的关心特定机器上的性能,您应该自己运行基准测试
var toMatch = "john w. smith";
var result = toMatch.replace(/(\w)(\w*)/g, function (_, i, r) {
return i.toUpperCase() + (r != null ? r : "");
}
)
似乎有用… 用上面的测试,“快棕色的狐狸?/跳过/越过了……“C:/程序文件/某些供应商/他们的第二个应用程序/a file1.txt”。
如果你想要2Nd而不是2Nd,你可以更改为/([a-z])(\w*)/g。
第一种形式可以简化为:
function toTitleCase(toTransform) {
return toTransform.replace(/\b([a-z])/g, function (_, initial) {
return initial.toUpperCase();
});
}
这个解决方案将标点符号考虑到新句子中,处理引用,将小词转换为小写,忽略首字母缩写或全大写单词。
var stopWordsArray = new Array("a", "all", "am", "an", "and", "any", "are", "as", "at", "be", "but", "by", "can", "can't", "did", "didn't", "do", "does", "doesn't", "don't", "else", "for", "get", "gets", "go", "got", "had", "has", "he", "he's", "her", "here", "hers", "hi", "him", "his", "how", "i'd", "i'll", "i'm", "i've", "if", "in", "is", "isn't", "it", "it's", "its", "let", "let's", "may", "me", "my", "no", "of", "off", "on", "our", "ours", "she", "so", "than", "that", "that's", "thats", "the", "their", "theirs", "them", "then", "there", "there's", "these", "they", "they'd", "they'll", "they're", "they've", "this", "those", "to", "too", "try", "until", "us", "want", "wants", "was", "wasn't", "we", "we'd", "we'll", "we're", "we've", "well", "went", "were", "weren't", "what", "what's", "when", "where", "which", "who", "who's", "whose", "why", "will", "with", "won't", "would", "yes", "yet", "you", "you'd", "you'll", "you're", "you've", "your");
// Only significant words are transformed. Handles acronyms and punctuation
String.prototype.toTitleCase = function() {
var newSentence = true;
return this.split(/\s+/).map(function(word) {
if (word == "") { return; }
var canCapitalise = true;
// Get the pos of the first alpha char (word might start with " or ')
var firstAlphaCharPos = word.search(/\w/);
// Check for uppercase char that is not the first char (might be acronym or all caps)
if (word.search(/[A-Z]/) > 0) {
canCapitalise = false;
} else if (stopWordsArray.indexOf(word) != -1) {
// Is a stop word and not a new sentence
word.toLowerCase();
if (!newSentence) {
canCapitalise = false;
}
}
// Is this the last word in a sentence?
newSentence = (word.search(/[\.!\?:]['"]?$/) > 0)? true : false;
return (canCapitalise)? word.replace(word[firstAlphaCharPos], word[firstAlphaCharPos].toUpperCase()) : word;
}).join(' ');
}
// Pass a string using dot notation:
alert("A critical examination of Plato's view of the human nature".toTitleCase());
var str = "Ten years on: a study into the effectiveness of NCEA in New Zealand schools";
str.toTitleCase());
str = "\"Where to from here?\" the effectivness of eLearning in childhood education";
alert(str.toTitleCase());
/* Result:
A Critical Examination of Plato's View of the Human Nature.
Ten Years On: A Study Into the Effectiveness of NCEA in New Zealand Schools.
"Where to From Here?" The Effectivness of eLearning in Childhood Education. */
只是另一个版本的混合。这也将检查字符串是否。长度为0:
String.prototype.toTitleCase = function() {
var str = this;
if(!str.length) {
return "";
}
str = str.split(" ");
for(var i = 0; i < str.length; i++) {
str[i] = str[i].charAt(0).toUpperCase() + (str[i].substr(1).length ? str[i].substr(1) : '');
}
return (str.length ? str.join(" ") : str);
};
如果你需要一个语法正确的答案:
这个答案考虑了介词,如“of”,“from”,… 输出将生成您希望在论文中看到的编辑风格的标题。
toTitleCase函数
考虑此处列出的语法规则的函数。 该函数还合并空格和删除特殊字符(根据需要修改regex)
const toTitleCase = (str) => {
const articles = ['a', 'an', 'the'];
const conjunctions = ['for', 'and', 'nor', 'but', 'or', 'yet', 'so'];
const prepositions = [
'with', 'at', 'from', 'into','upon', 'of', 'to', 'in', 'for',
'on', 'by', 'like', 'over', 'plus', 'but', 'up', 'down', 'off', 'near'
];
// The list of spacial characters can be tweaked here
const replaceCharsWithSpace = (str) => str.replace(/[^0-9a-z&/\\]/gi, ' ').replace(/(\s\s+)/gi, ' ');
const capitalizeFirstLetter = (str) => str.charAt(0).toUpperCase() + str.substr(1);
const normalizeStr = (str) => str.toLowerCase().trim();
const shouldCapitalize = (word, fullWordList, posWithinStr) => {
if ((posWithinStr == 0) || (posWithinStr == fullWordList.length - 1)) {
return true;
}
return !(articles.includes(word) || conjunctions.includes(word) || prepositions.includes(word));
}
str = replaceCharsWithSpace(str);
str = normalizeStr(str);
let words = str.split(' ');
if (words.length <= 2) { // Strings less than 3 words long should always have first words capitalized
words = words.map(w => capitalizeFirstLetter(w));
}
else {
for (let i = 0; i < words.length; i++) {
words[i] = (shouldCapitalize(words[i], words, i) ? capitalizeFirstLetter(words[i], words, i) : words[i]);
}
}
return words.join(' ');
}
确保正确性的单元测试
import { expect } from 'chai';
import { toTitleCase } from '../../src/lib/stringHelper';
describe('toTitleCase', () => {
it('Capitalizes first letter of each word irrespective of articles, conjunctions or prepositions if string is no greater than two words long', function(){
expect(toTitleCase('the dog')).to.equal('The Dog'); // Capitalize articles when only two words long
expect(toTitleCase('for all')).to.equal('For All'); // Capitalize conjunctions when only two words long
expect(toTitleCase('with cats')).to.equal('With Cats'); // Capitalize prepositions when only two words long
});
it('Always capitalize first and last words in a string irrespective of articles, conjunctions or prepositions', function(){
expect(toTitleCase('the beautiful dog')).to.equal('The Beautiful Dog');
expect(toTitleCase('for all the deadly ninjas, be it so')).to.equal('For All the Deadly Ninjas Be It So');
expect(toTitleCase('with cats and dogs we are near')).to.equal('With Cats and Dogs We Are Near');
});
it('Replace special characters with space', function(){
expect(toTitleCase('[wolves & lions]: be careful')).to.equal('Wolves & Lions Be Careful');
expect(toTitleCase('wolves & lions, be careful')).to.equal('Wolves & Lions Be Careful');
});
it('Trim whitespace at beginning and end', function(){
expect(toTitleCase(' mario & Luigi superstar saga ')).to.equal('Mario & Luigi Superstar Saga');
});
it('articles, conjunctions and prepositions should not be capitalized in strings of 3+ words', function(){
expect(toTitleCase('The wolf and the lion: a tale of two like animals')).to.equal('The Wolf and the Lion a Tale of Two like Animals');
expect(toTitleCase('the three Musketeers And plus ')).to.equal('The Three Musketeers and Plus');
});
});
请注意,我从提供的字符串中删除了相当多的特殊字符。您将需要调整正则表达式以满足项目的需求。