我想把一个非常大的字符串(比如10,000个字符)分割成n大小的块。
就性能而言,最好的方法是什么?
例如: "1234567890"除以2将变成["12","34","56","78","90"]。
使用string。prototype。match可以实现这样的事情吗如果可以,从性能来看,这是最好的方式吗?
我想把一个非常大的字符串(比如10,000个字符)分割成n大小的块。
就性能而言,最好的方法是什么?
例如: "1234567890"除以2将变成["12","34","56","78","90"]。
使用string。prototype。match可以实现这样的事情吗如果可以,从性能来看,这是最好的方式吗?
当前回答
const getChunksFromString = (str, chunkSize) => {
var regexChunk = new RegExp(`.{1,${chunkSize}}`, 'g') // '.' represents any character
return str.match(regexChunk)
}
根据需要调用它
console.log(getChunksFromString("Hello world", 3)) // ["Hel", "lo ", "wor", "ld"]
其他回答
以原型函数的形式:
String.prototype.lsplit = function(){
return this.match(new RegExp('.{1,'+ ((arguments.length==1)?(isFinite(String(arguments[0]).trim())?arguments[0]:false):1) +'}', 'g'));
}
它将大字符串拆分为给定单词的小字符串。
function chunkSubstr(str, words) {
var parts = str.split(" ") , values = [] , i = 0 , tmpVar = "";
$.each(parts, function(index, value) {
if(tmpVar.length < words){
tmpVar += " " + value;
}else{
values[i] = tmpVar.replace(/\s+/g, " ");
i++;
tmpVar = value;
}
});
if(values.length < 1 && parts.length > 0){
values[0] = tmpVar;
}
return values;
}
这是我使用的代码,它使用string。prototype。slice。
是的,这是一个相当长的答案,因为它试图遵循当前的标准尽可能接近,当然包含了合理数量的JSDOC评论。然而,一旦缩小,代码只有828字节,一旦gzip传输,它只有497字节。
这个添加到String中的1方法。prototype(使用Object.defineProperty如果可用)是:
toChunks
包含了许多测试来检查功能。
担心代码的长度会影响性能?不用担心,http://jsperf.com/chunk-string/3
许多额外的代码是为了确保代码在多个javascript环境中响应相同。
/*jslint maxlen:80, browser:true, devel:true */ /* * Properties used by toChunks. */ /*property MAX_SAFE_INTEGER, abs, ceil, configurable, defineProperty, enumerable, floor, length, max, min, pow, prototype, slice, toChunks, value, writable */ /* * Properties used in the testing of toChunks implimentation. */ /*property appendChild, createTextNode, floor, fromCharCode, getElementById, length, log, pow, push, random, toChunks */ (function () { 'use strict'; var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || Math.pow(2, 53) - 1; /** * Defines a new property directly on an object, or modifies an existing * property on an object, and returns the object. * * @private * @function * @param {Object} object * @param {string} property * @param {Object} descriptor * @return {Object} * @see https://goo.gl/CZnEqg */ function $defineProperty(object, property, descriptor) { if (Object.defineProperty) { Object.defineProperty(object, property, descriptor); } else { object[property] = descriptor.value; } return object; } /** * Returns true if the operands are strictly equal with no type conversion. * * @private * @function * @param {*} a * @param {*} b * @return {boolean} * @see http://www.ecma-international.org/ecma-262/5.1/#sec-11.9.4 */ function $strictEqual(a, b) { return a === b; } /** * Returns true if the operand inputArg is undefined. * * @private * @function * @param {*} inputArg * @return {boolean} */ function $isUndefined(inputArg) { return $strictEqual(typeof inputArg, 'undefined'); } /** * The abstract operation throws an error if its argument is a value that * cannot be converted to an Object, otherwise returns the argument. * * @private * @function * @param {*} inputArg The object to be tested. * @throws {TypeError} If inputArg is null or undefined. * @return {*} The inputArg if coercible. * @see https://goo.gl/5GcmVq */ function $requireObjectCoercible(inputArg) { var errStr; if (inputArg === null || $isUndefined(inputArg)) { errStr = 'Cannot convert argument to object: ' + inputArg; throw new TypeError(errStr); } return inputArg; } /** * The abstract operation converts its argument to a value of type string * * @private * @function * @param {*} inputArg * @return {string} * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tostring */ function $toString(inputArg) { var type, val; if (inputArg === null) { val = 'null'; } else { type = typeof inputArg; if (type === 'string') { val = inputArg; } else if (type === 'undefined') { val = type; } else { if (type === 'symbol') { throw new TypeError('Cannot convert symbol to string'); } val = String(inputArg); } } return val; } /** * Returns a string only if the arguments is coercible otherwise throws an * error. * * @private * @function * @param {*} inputArg * @throws {TypeError} If inputArg is null or undefined. * @return {string} */ function $onlyCoercibleToString(inputArg) { return $toString($requireObjectCoercible(inputArg)); } /** * The function evaluates the passed value and converts it to an integer. * * @private * @function * @param {*} inputArg The object to be converted to an integer. * @return {number} If the target value is NaN, null or undefined, 0 is * returned. If the target value is false, 0 is returned * and if true, 1 is returned. * @see http://www.ecma-international.org/ecma-262/5.1/#sec-9.4 */ function $toInteger(inputArg) { var number = +inputArg, val = 0; if ($strictEqual(number, number)) { if (!number || number === Infinity || number === -Infinity) { val = number; } else { val = (number > 0 || -1) * Math.floor(Math.abs(number)); } } return val; } /** * The abstract operation ToLength converts its argument to an integer * suitable for use as the length of an array-like object. * * @private * @function * @param {*} inputArg The object to be converted to a length. * @return {number} If len <= +0 then +0 else if len is +INFINITY then * 2^53-1 else min(len, 2^53-1). * @see https://people.mozilla.org/~jorendorff/es6-draft.html#sec-tolength */ function $toLength(inputArg) { return Math.min(Math.max($toInteger(inputArg), 0), MAX_SAFE_INTEGER); } if (!String.prototype.toChunks) { /** * This method chunks a string into an array of strings of a specified * chunk size. * * @function * @this {string} The string to be chunked. * @param {Number} chunkSize The size of the chunks that the string will * be chunked into. * @returns {Array} Returns an array of the chunked string. */ $defineProperty(String.prototype, 'toChunks', { enumerable: false, configurable: true, writable: true, value: function (chunkSize) { var str = $onlyCoercibleToString(this), chunkLength = $toInteger(chunkSize), chunked = [], numChunks, length, index, start, end; if (chunkLength < 1) { return chunked; } length = $toLength(str.length); numChunks = Math.ceil(length / chunkLength); index = 0; start = 0; end = chunkLength; chunked.length = numChunks; while (index < numChunks) { chunked[index] = str.slice(start, end); start = end; end += chunkLength; index += 1; } return chunked; } }); } }()); /* * Some tests */ (function () { 'use strict'; var pre = document.getElementById('out'), chunkSizes = [], maxChunkSize = 512, testString = '', maxTestString = 100000, chunkSize = 0, index = 1; while (chunkSize < maxChunkSize) { chunkSize = Math.pow(2, index); chunkSizes.push(chunkSize); index += 1; } index = 0; while (index < maxTestString) { testString += String.fromCharCode(Math.floor(Math.random() * 95) + 32); index += 1; } function log(result) { pre.appendChild(document.createTextNode(result + '\n')); } function test() { var strLength = testString.length, czLength = chunkSizes.length, czIndex = 0, czValue, result, numChunks, pass; while (czIndex < czLength) { czValue = chunkSizes[czIndex]; numChunks = Math.ceil(strLength / czValue); result = testString.toChunks(czValue); czIndex += 1; log('chunksize: ' + czValue); log(' Number of chunks:'); log(' Calculated: ' + numChunks); log(' Actual:' + result.length); pass = result.length === numChunks; log(' First chunk size: ' + result[0].length); pass = pass && result[0].length === czValue; log(' Passed: ' + pass); log(''); } } test(); log(''); log('Simple test result'); log('abcdefghijklmnopqrstuvwxyz'.toChunks(3)); }()); <pre id="out"></pre>
你可以这样做:
"1234567890".match(/.{1,2}/g);
// Results in:
["12", "34", "56", "78", "90"]
如果字符串的大小不是chunk-size的倍数,该方法仍然有效:
"123456789".match(/.{1,2}/g);
// Results in:
["12", "34", "56", "78", "9"]
一般来说,对于任何你想要提取最多n个子字符串的字符串,你可以这样做:
str.match(/.{1,n}/g); // Replace n with the size of the substring
如果你的字符串可以包含换行符或回车,你会这样做:
str.match(/(.|[\r\n]){1,n}/g); // Replace n with the size of the substring
至于性能,我用了大约10k个字符,在Chrome上花了一秒钟多一点的时间。YMMV。
这也可以用在可重用函数中:
function chunkString(str, length) {
return str.match(new RegExp('.{1,' + length + '}', 'g'));
}
我对上述解决方案的问题是,不管在句子中的位置如何,它都将字符串划分为正式的大小块。
我认为下面的方法比较好;虽然它需要一些性能调整:
static chunkString(str, length, size,delimiter='\n' ) {
const result = [];
for (let i = 0; i < str.length; i++) {
const lastIndex = _.lastIndexOf(str, delimiter,size + i);
result.push(str.substr(i, lastIndex - i));
i = lastIndex;
}
return result;
}