假设我有一个字符串:
string str = "1111222233334444";
我如何把这个字符串分成一定大小的块?
例如,将它分解为4的大小将返回字符串:
"1111"
"2222"
"3333"
"4444"
假设我有一个字符串:
string str = "1111222233334444";
我如何把这个字符串分成一定大小的块?
例如,将它分解为4的大小将返回字符串:
"1111"
"2222"
"3333"
"4444"
当前回答
我不记得是谁给我的了,但它很好用。我快速测试了几种将枚举类型分成组的方法。用法是这样的…
List<string> Divided = Source3.Chunk(24).Select(Piece => string.Concat<char>(Piece)).ToList();
扩展代码看起来像这样…
#region Chunk Logic
private class ChunkedEnumerable<T> : IEnumerable<T>
{
class ChildEnumerator : IEnumerator<T>
{
ChunkedEnumerable<T> parent;
int position;
bool done = false;
T current;
public ChildEnumerator(ChunkedEnumerable<T> parent)
{
this.parent = parent;
position = -1;
parent.wrapper.AddRef();
}
public T Current
{
get
{
if (position == -1 || done)
{
throw new InvalidOperationException();
}
return current;
}
}
public void Dispose()
{
if (!done)
{
done = true;
parent.wrapper.RemoveRef();
}
}
object System.Collections.IEnumerator.Current
{
get { return Current; }
}
public bool MoveNext()
{
position++;
if (position + 1 > parent.chunkSize)
{
done = true;
}
if (!done)
{
done = !parent.wrapper.Get(position + parent.start, out current);
}
return !done;
}
public void Reset()
{
// per http://msdn.microsoft.com/en-us/library/system.collections.ienumerator.reset.aspx
throw new NotSupportedException();
}
}
EnumeratorWrapper<T> wrapper;
int chunkSize;
int start;
public ChunkedEnumerable(EnumeratorWrapper<T> wrapper, int chunkSize, int start)
{
this.wrapper = wrapper;
this.chunkSize = chunkSize;
this.start = start;
}
public IEnumerator<T> GetEnumerator()
{
return new ChildEnumerator(this);
}
System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
}
private class EnumeratorWrapper<T>
{
public EnumeratorWrapper(IEnumerable<T> source)
{
SourceEumerable = source;
}
IEnumerable<T> SourceEumerable { get; set; }
Enumeration currentEnumeration;
class Enumeration
{
public IEnumerator<T> Source { get; set; }
public int Position { get; set; }
public bool AtEnd { get; set; }
}
public bool Get(int pos, out T item)
{
if (currentEnumeration != null && currentEnumeration.Position > pos)
{
currentEnumeration.Source.Dispose();
currentEnumeration = null;
}
if (currentEnumeration == null)
{
currentEnumeration = new Enumeration { Position = -1, Source = SourceEumerable.GetEnumerator(), AtEnd = false };
}
item = default(T);
if (currentEnumeration.AtEnd)
{
return false;
}
while (currentEnumeration.Position < pos)
{
currentEnumeration.AtEnd = !currentEnumeration.Source.MoveNext();
currentEnumeration.Position++;
if (currentEnumeration.AtEnd)
{
return false;
}
}
item = currentEnumeration.Source.Current;
return true;
}
int refs = 0;
// needed for dispose semantics
public void AddRef()
{
refs++;
}
public void RemoveRef()
{
refs--;
if (refs == 0 && currentEnumeration != null)
{
var copy = currentEnumeration;
currentEnumeration = null;
copy.Source.Dispose();
}
}
}
/// <summary>Speed Checked. Works Great!</summary>
public static IEnumerable<IEnumerable<T>> Chunk<T>(this IEnumerable<T> source, int chunksize)
{
if (chunksize < 1) throw new InvalidOperationException();
var wrapper = new EnumeratorWrapper<T>(source);
int currentPos = 0;
T ignore;
try
{
wrapper.AddRef();
while (wrapper.Get(currentPos, out ignore))
{
yield return new ChunkedEnumerable<T>(wrapper, chunksize, currentPos);
currentPos += chunksize;
}
}
finally
{
wrapper.RemoveRef();
}
}
#endregion
其他回答
使用正则表达式和Linq:
List<string> groups = (from Match m in Regex.Matches(str, @"\d{4}")
select m.Value).ToList();
我觉得这样更有可读性,但这只是个人观点。它也可以是一行代码:)。
在多芬和康他汀的答案组合中……
static IEnumerable<string> WholeChunks(string str, int chunkSize) {
for (int i = 0; i < str.Length; i += chunkSize)
yield return str.Substring(i, chunkSize);
}
这将适用于所有可以被分割成大量块的字符串,否则将抛出异常。
如果你想支持任意长度的字符串,你可以使用下面的代码:
static IEnumerable<string> ChunksUpto(string str, int maxChunkSize) {
for (int i = 0; i < str.Length; i += maxChunkSize)
yield return str.Substring(i, Math.Min(maxChunkSize, str.Length-i));
}
然而,OP明确表示他不需要这个;它有点长,很难读,稍微慢一点。本着KISS和YAGNI的精神,我选择第一个选项:它可能是最有效的实现,而且非常简短、可读,而且重要的是,它会对不符合规范的输入抛出异常。
六年后o_O
仅仅因为
public static IEnumerable<string> Split(this string str, int chunkSize, bool remainingInFront)
{
var count = (int) Math.Ceiling(str.Length/(double) chunkSize);
Func<int, int> start = index => remainingInFront ? str.Length - (count - index)*chunkSize : index*chunkSize;
Func<int, int> end = index => Math.Min(str.Length - Math.Max(start(index), 0), Math.Min(start(index) + chunkSize - Math.Max(start(index), 0), chunkSize));
return Enumerable.Range(0, count).Select(i => str.Substring(Math.Max(start(i), 0),end(i)));
}
or
private static Func<bool, int, int, int, int, int> start = (remainingInFront, length, count, index, size) =>
remainingInFront ? length - (count - index) * size : index * size;
private static Func<bool, int, int, int, int, int, int> end = (remainingInFront, length, count, index, size, start) =>
Math.Min(length - Math.Max(start, 0), Math.Min(start + size - Math.Max(start, 0), size));
public static IEnumerable<string> Split(this string str, int chunkSize, bool remainingInFront)
{
var count = (int)Math.Ceiling(str.Length / (double)chunkSize);
return Enumerable.Range(0, count).Select(i => str.Substring(
Math.Max(start(remainingInFront, str.Length, count, i, chunkSize), 0),
end(remainingInFront, str.Length, count, i, chunkSize, start(remainingInFront, str.Length, count, i, chunkSize))
));
}
AFAIK所有的边缘情况都处理好了。
Console.WriteLine(string.Join(" ", "abc".Split(2, false))); // ab c
Console.WriteLine(string.Join(" ", "abc".Split(2, true))); // a bc
Console.WriteLine(string.Join(" ", "a".Split(2, true))); // a
Console.WriteLine(string.Join(" ", "a".Split(2, false))); // a
这样写一行代码怎么样?
List<string> result = new List<string>(Regex.Split(target, @"(?<=\G.{4})", RegexOptions.Singleline));
对于这个正则表达式,最后一个块是否小于4个字符并不重要,因为它只查看它后面的字符。
我知道这不是最有效的解决方案,但我不得不把它扔出去。
static IEnumerable<string> Split(string str, int chunkSize)
{
IEnumerable<string> retVal = Enumerable.Range(0, str.Length / chunkSize)
.Select(i => str.Substring(i * chunkSize, chunkSize))
if (str.Length % chunkSize > 0)
retVal = retVal.Append(str.Substring(str.Length / chunkSize * chunkSize, str.Length % chunkSize));
return retVal;
}
它正确地处理不能被chunkSize整除的输入字符串长度。
请注意,可能需要额外的代码来优雅地处理边缘情况(null或空输入字符串,chunkSize == 0)。