这是另一个完整的外部连接
由于对其他命题的简单性和可读性不太满意,我最后得出了这样的结论:
它没有快速的自命(在2020m CPU上加入1000 * 1000大约800毫秒:2.4ghz / 2核)。对我来说,它只是一个紧凑而随意的完全外部连接。
它的工作原理与SQL FULL OUTER JOIN相同(重复保存)
欢呼;-)
using System;
using System.Collections.Generic;
using System.Linq;
namespace NS
{
public static class DataReunion
{
public static List<Tuple<T1, T2>> FullJoin<T1, T2, TKey>(List<T1> List1, Func<T1, TKey> KeyFunc1, List<T2> List2, Func<T2, TKey> KeyFunc2)
{
List<Tuple<T1, T2>> result = new List<Tuple<T1, T2>>();
Tuple<TKey, T1>[] identifiedList1 = List1.Select(_ => Tuple.Create(KeyFunc1(_), _)).OrderBy(_ => _.Item1).ToArray();
Tuple<TKey, T2>[] identifiedList2 = List2.Select(_ => Tuple.Create(KeyFunc2(_), _)).OrderBy(_ => _.Item1).ToArray();
identifiedList1.Where(_ => !identifiedList2.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
result.Add(Tuple.Create<T1, T2>(_.Item2, default(T2)));
});
result.AddRange(
identifiedList1.Join(identifiedList2, left => left.Item1, right => right.Item1, (left, right) => Tuple.Create<T1, T2>(left.Item2, right.Item2)).ToList()
);
identifiedList2.Where(_ => !identifiedList1.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
result.Add(Tuple.Create<T1, T2>(default(T1), _.Item2));
});
return result;
}
}
}
这个想法是
基于提供的关键函数生成器构建id
处理仅剩下的项
流程内部连接
只处理正确的项目
下面是一个与之相关的简单测试:
在结束处放置断点,以手动验证它的行为是否符合预期
using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NS;
namespace Tests
{
[TestClass]
public class DataReunionTest
{
[TestMethod]
public void Test()
{
List<Tuple<Int32, Int32, String>> A = new List<Tuple<Int32, Int32, String>>();
List<Tuple<Int32, Int32, String>> B = new List<Tuple<Int32, Int32, String>>();
Random rnd = new Random();
/* Comment the testing block you do not want to run
/* Solution to test a wide range of keys*/
for (int i = 0; i < 500; i += 1)
{
A.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "A"));
B.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "B"));
}
/* Solution for essential testing*/
A.Add(Tuple.Create(1, 2, "B11"));
A.Add(Tuple.Create(1, 2, "B12"));
A.Add(Tuple.Create(1, 3, "C11"));
A.Add(Tuple.Create(1, 3, "C12"));
A.Add(Tuple.Create(1, 3, "C13"));
A.Add(Tuple.Create(1, 4, "D1"));
B.Add(Tuple.Create(1, 1, "A21"));
B.Add(Tuple.Create(1, 1, "A22"));
B.Add(Tuple.Create(1, 1, "A23"));
B.Add(Tuple.Create(1, 2, "B21"));
B.Add(Tuple.Create(1, 2, "B22"));
B.Add(Tuple.Create(1, 2, "B23"));
B.Add(Tuple.Create(1, 3, "C2"));
B.Add(Tuple.Create(1, 5, "E2"));
Func<Tuple<Int32, Int32, String>, Tuple<Int32, Int32>> key = (_) => Tuple.Create(_.Item1, _.Item2);
var watch = System.Diagnostics.Stopwatch.StartNew();
var res = DataReunion.FullJoin(A, key, B, key);
watch.Stop();
var elapsedMs = watch.ElapsedMilliseconds;
String aser = JToken.FromObject(res).ToString(Formatting.Indented);
Console.Write(elapsedMs);
}
}
}