我有一份人们的身份证和名字的名单,还有一份人们的身份证和姓氏的名单。有些人没有名字,有些人没有姓;我想在这两个列表上做一个完整的外部连接。

下面列出:

ID  FirstName
--  ---------
 1  John
 2  Sue

ID  LastName
--  --------
 1  Doe
 3  Smith

应该生产:

ID  FirstName  LastName
--  ---------  --------
 1  John       Doe
 2  Sue
 3             Smith

我已经发现了相当多的解决方案的“LINQ外部连接”,它们看起来都很相似,但似乎真的是离开外部连接。

到目前为止我的尝试是这样的:

private void OuterJoinTest()
{
    List<FirstName> firstNames = new List<FirstName>();
    firstNames.Add(new FirstName { ID = 1, Name = "John" });
    firstNames.Add(new FirstName { ID = 2, Name = "Sue" });

    List<LastName> lastNames = new List<LastName>();
    lastNames.Add(new LastName { ID = 1, Name = "Doe" });
    lastNames.Add(new LastName { ID = 3, Name = "Smith" });

    var outerJoin = from first in firstNames
        join last in lastNames
        on first.ID equals last.ID
        into temp
        from last in temp.DefaultIfEmpty()
        select new
        {
            id = first != null ? first.ID : last.ID,
            firstname = first != null ? first.Name : string.Empty,
            surname = last != null ? last.Name : string.Empty
        };
    }
}

public class FirstName
{
    public int ID;
    
    public string Name;
}
    
public class LastName
{
    public int ID;
    
    public string Name;
}

但结果是:

ID  FirstName  LastName
--  ---------  --------
 1  John       Doe
 2  Sue

我做错了什么?


当前回答

我认为其中大部分都存在问题,包括公认的答案,因为它们与Linq相比IQueryable工作不太好,要么是由于做了太多的服务器往返和太多的数据返回,要么是做了太多的客户端执行。

对于IEnumerable,我不喜欢Sehe的答案或类似的答案,因为它占用了过多的内存(一个简单的10000000两个列表测试在我的32GB机器上运行Linqpad内存)。

另外,其他大多数方法实际上并没有实现正确的Full Outer Join,因为它们使用的是带有右连接的Union,而不是带有右反半连接的Concat,这不仅消除了结果中重复的内部连接行,而且消除了最初存在于左或右数据中的任何正确的副本。

所以这里是我的扩展,处理所有这些问题,生成SQL以及直接实现LINQ到SQL的连接,在服务器上执行,比其他枚举对象更快,内存更少:

public static class Ext {
    public static IEnumerable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return from left in leftItems
               join right in rightItems on leftKeySelector(left) equals rightKeySelector(right) into temp
               from right in temp.DefaultIfEmpty()
               select resultSelector(left, right);
    }

    public static IEnumerable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return from right in rightItems
               join left in leftItems on rightKeySelector(right) equals leftKeySelector(left) into temp
               from left in temp.DefaultIfEmpty()
               select resultSelector(left, right);
    }

    public static IEnumerable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    public static IEnumerable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        var hashLK = new HashSet<TKey>(from l in leftItems select leftKeySelector(l));
        return rightItems.Where(r => !hashLK.Contains(rightKeySelector(r))).Select(r => resultSelector(default(TLeft),r));
    }

    public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector)  where TLeft : class {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;

    public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
        var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
        var parmC = Expression.Parameter(typeof(TRight), "c");
        var argLeft = Expression.PropertyOrField(parmP, "left");
        var newleftrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));

        return leftItems.AsQueryable().GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
    }

    public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
        var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
        var parmC = Expression.Parameter(typeof(TLeft), "c");
        var argRight = Expression.PropertyOrField(parmP, "right");
        var newrightrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));

        return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
    }

    public static IQueryable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    private static Expression<Func<TP, TResult>> CastSBody<TP, TResult>(LambdaExpression ex, TP unusedP, TResult unusedRes) => (Expression<Func<TP, TResult>>)ex;

    public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLgR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
        var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
        var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
        var argRight = Expression.PropertyOrField(parmLgR, "right");
        var newrightrs = CastSBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));

        return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
    }

    public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }
}

右反半连接(Right Anti-Semi-Join)之间的差异在Linq to Objects或源代码中大多是没有意义的,但在最终的答案中,在服务器(SQL)端产生了差异,删除了一个不必要的连接。

手动编码Expression来处理将Expression<Func<>>合并到lambda中,可以使用LinqKit进行改进,但如果语言/编译器为此添加了一些帮助,那就更好了。为了完整起见,包括了fullouterjo模糊不清和RightOuterJoin函数,但我还没有重新实现FullOuterGroupJoin。

我为IEnumerable编写了另一个完整的外部连接版本,用于键是可排序的情况,这比组合左外部连接和右反半连接快50%左右,至少在小型集合上是这样。它只对每个集合进行一次排序。

我还为使用EF的版本添加了另一个答案,即用自定义展开替换Invoke。

其他回答

这是另一个完整的外部连接

由于对其他命题的简单性和可读性不太满意,我最后得出了这样的结论:

它没有快速的自命(在2020m CPU上加入1000 * 1000大约800毫秒:2.4ghz / 2核)。对我来说,它只是一个紧凑而随意的完全外部连接。

它的工作原理与SQL FULL OUTER JOIN相同(重复保存)

欢呼;-)

using System;
using System.Collections.Generic;
using System.Linq;
namespace NS
{
public static class DataReunion
{
    public static List<Tuple<T1, T2>> FullJoin<T1, T2, TKey>(List<T1> List1, Func<T1, TKey> KeyFunc1, List<T2> List2, Func<T2, TKey> KeyFunc2)
    {
        List<Tuple<T1, T2>> result = new List<Tuple<T1, T2>>();

        Tuple<TKey, T1>[] identifiedList1 = List1.Select(_ => Tuple.Create(KeyFunc1(_), _)).OrderBy(_ => _.Item1).ToArray();
        Tuple<TKey, T2>[] identifiedList2 = List2.Select(_ => Tuple.Create(KeyFunc2(_), _)).OrderBy(_ => _.Item1).ToArray();

        identifiedList1.Where(_ => !identifiedList2.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
            result.Add(Tuple.Create<T1, T2>(_.Item2, default(T2)));
        });

        result.AddRange(
            identifiedList1.Join(identifiedList2, left => left.Item1, right => right.Item1, (left, right) => Tuple.Create<T1, T2>(left.Item2, right.Item2)).ToList()
        );

        identifiedList2.Where(_ => !identifiedList1.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
            result.Add(Tuple.Create<T1, T2>(default(T1), _.Item2));
        });

        return result;
    }
}
}

这个想法是

基于提供的关键函数生成器构建id 处理仅剩下的项 流程内部连接 只处理正确的项目

下面是一个与之相关的简单测试:

在结束处放置断点,以手动验证它的行为是否符合预期

using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NS;

namespace Tests
{
[TestClass]
public class DataReunionTest
{
    [TestMethod]
    public void Test()
    {
        List<Tuple<Int32, Int32, String>> A = new List<Tuple<Int32, Int32, String>>();
        List<Tuple<Int32, Int32, String>> B = new List<Tuple<Int32, Int32, String>>();

        Random rnd = new Random();

        /* Comment the testing block you do not want to run
        /* Solution to test a wide range of keys*/

        for (int i = 0; i < 500; i += 1)
        {
            A.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "A"));
            B.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "B"));
        }

        /* Solution for essential testing*/

        A.Add(Tuple.Create(1, 2, "B11"));
        A.Add(Tuple.Create(1, 2, "B12"));
        A.Add(Tuple.Create(1, 3, "C11"));
        A.Add(Tuple.Create(1, 3, "C12"));
        A.Add(Tuple.Create(1, 3, "C13"));
        A.Add(Tuple.Create(1, 4, "D1"));

        B.Add(Tuple.Create(1, 1, "A21"));
        B.Add(Tuple.Create(1, 1, "A22"));
        B.Add(Tuple.Create(1, 1, "A23"));
        B.Add(Tuple.Create(1, 2, "B21"));
        B.Add(Tuple.Create(1, 2, "B22"));
        B.Add(Tuple.Create(1, 2, "B23"));
        B.Add(Tuple.Create(1, 3, "C2"));
        B.Add(Tuple.Create(1, 5, "E2"));

        Func<Tuple<Int32, Int32, String>, Tuple<Int32, Int32>> key = (_) => Tuple.Create(_.Item1, _.Item2);

        var watch = System.Diagnostics.Stopwatch.StartNew();
        var res = DataReunion.FullJoin(A, key, B, key);
        watch.Stop();
        var elapsedMs = watch.ElapsedMilliseconds;
        String aser = JToken.FromObject(res).ToString(Formatting.Indented);
        Console.Write(elapsedMs);
    }
}

}

我猜@sehe的方法更强大,但在我更好地理解它之前,我发现自己跳过了@MichaelSander的扩展。我修改了它,以匹配这里描述的内置Enumerable.Join()方法的语法和返回类型。我在@JeffMercado的解决方案下为@cadrell0的注释添加了“distinct”后缀。

public static class MyExtensions {

    public static IEnumerable<TResult> FullJoinDistinct<TLeft, TRight, TKey, TResult> (
        this IEnumerable<TLeft> leftItems, 
        IEnumerable<TRight> rightItems, 
        Func<TLeft, TKey> leftKeySelector, 
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector
    ) {

        var leftJoin = 
            from left in leftItems
            join right in rightItems 
              on leftKeySelector(left) equals rightKeySelector(right) into temp
            from right in temp.DefaultIfEmpty()
            select resultSelector(left, right);

        var rightJoin = 
            from right in rightItems
            join left in leftItems 
              on rightKeySelector(right) equals leftKeySelector(left) into temp
            from left in temp.DefaultIfEmpty()
            select resultSelector(left, right);

        return leftJoin.Union(rightJoin);
    }

}

在这个例子中,你可以这样使用它:

var test = 
    firstNames
    .FullJoinDistinct(
        lastNames,
        f=> f.ID,
        j=> j.ID,
        (f,j)=> new {
            ID = f == null ? j.ID : f.ID, 
            leftName = f == null ? null : f.Name,
            rightName = j == null ? null : j.Name
        }
    );

在未来,随着我了解更多,我有一种感觉,我会迁移到@sehe的逻辑,因为它很受欢迎。但即使这样,我也必须小心,因为我觉得如果可行的话,至少有一个重载与现有的“.Join()”方法的语法匹配是很重要的,原因有两个:

方法的一致性有助于节省时间、避免错误和避免意外行为。 如果将来有一个开箱即用的“. fulljoin()”方法,我想它会尽量保持当前存在的“. join()”方法的语法。如果是这样,那么如果您想迁移到它,您可以简单地重命名函数,而不需要更改参数或担心不同的返回类型破坏您的代码。

对于泛型、扩展、Func语句和其他特性,我还是新手,所以当然欢迎反馈。

编辑:我没花很长时间就意识到我的代码有问题。我在LINQPad中做了一个. dump(),并查看返回类型。它只是IEnumerable,所以我试着匹配它。但是当我实际上在我的扩展上做了. where()或. select()时,我得到了一个错误:“系统集合。IEnumerable'不包含'Select'和…"的定义。因此,最后我能够匹配. join()的输入语法,但不能匹配返回行为。

编辑:将“TResult”添加到函数的返回类型中。在阅读微软的文章时错过了这一点,当然这是有道理的。有了这个修复,现在看来返回行为是符合我的目标毕竟。

下面是一个扩展方法:

public static IEnumerable<KeyValuePair<TLeft, TRight>> FullOuterJoin<TLeft, TRight>(this IEnumerable<TLeft> leftItems, Func<TLeft, object> leftIdSelector, IEnumerable<TRight> rightItems, Func<TRight, object> rightIdSelector)
{
    var leftOuterJoin = from left in leftItems
        join right in rightItems on leftIdSelector(left) equals rightIdSelector(right) into temp
        from right in temp.DefaultIfEmpty()
        select new { left, right };

    var rightOuterJoin = from right in rightItems
        join left in leftItems on rightIdSelector(right) equals leftIdSelector(left) into temp
        from left in temp.DefaultIfEmpty()
        select new { left, right };

    var fullOuterJoin = leftOuterJoin.Union(rightOuterJoin);

    return fullOuterJoin.Select(x => new KeyValuePair<TLeft, TRight>(x.left, x.right));
}

我认为其中大部分都存在问题,包括公认的答案,因为它们与Linq相比IQueryable工作不太好,要么是由于做了太多的服务器往返和太多的数据返回,要么是做了太多的客户端执行。

对于IEnumerable,我不喜欢Sehe的答案或类似的答案,因为它占用了过多的内存(一个简单的10000000两个列表测试在我的32GB机器上运行Linqpad内存)。

另外,其他大多数方法实际上并没有实现正确的Full Outer Join,因为它们使用的是带有右连接的Union,而不是带有右反半连接的Concat,这不仅消除了结果中重复的内部连接行,而且消除了最初存在于左或右数据中的任何正确的副本。

所以这里是我的扩展,处理所有这些问题,生成SQL以及直接实现LINQ到SQL的连接,在服务器上执行,比其他枚举对象更快,内存更少:

public static class Ext {
    public static IEnumerable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return from left in leftItems
               join right in rightItems on leftKeySelector(left) equals rightKeySelector(right) into temp
               from right in temp.DefaultIfEmpty()
               select resultSelector(left, right);
    }

    public static IEnumerable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return from right in rightItems
               join left in leftItems on rightKeySelector(right) equals leftKeySelector(left) into temp
               from left in temp.DefaultIfEmpty()
               select resultSelector(left, right);
    }

    public static IEnumerable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    public static IEnumerable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector) {

        var hashLK = new HashSet<TKey>(from l in leftItems select leftKeySelector(l));
        return rightItems.Where(r => !hashLK.Contains(rightKeySelector(r))).Select(r => resultSelector(default(TLeft),r));
    }

    public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
        this IEnumerable<TLeft> leftItems,
        IEnumerable<TRight> rightItems,
        Func<TLeft, TKey> leftKeySelector,
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector)  where TLeft : class {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    private static Expression<Func<TP, TC, TResult>> CastSMBody<TP, TC, TResult>(LambdaExpression ex, TP unusedP, TC unusedC, TResult unusedRes) => (Expression<Func<TP, TC, TResult>>)ex;

    public static IQueryable<TResult> LeftOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLR = new { left = default(TLeft), rightg = default(IEnumerable<TRight>) };
        var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
        var parmC = Expression.Parameter(typeof(TRight), "c");
        var argLeft = Expression.PropertyOrField(parmP, "left");
        var newleftrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, parmC), parmP, parmC), sampleAnonLR, default(TRight), default(TResult));

        return leftItems.AsQueryable().GroupJoin(rightItems, leftKeySelector, rightKeySelector, (left, rightg) => new { left, rightg }).SelectMany(r => r.rightg.DefaultIfEmpty(), newleftrs);
    }

    public static IQueryable<TResult> RightOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
        var parmP = Expression.Parameter(sampleAnonLR.GetType(), "p");
        var parmC = Expression.Parameter(typeof(TLeft), "c");
        var argRight = Expression.PropertyOrField(parmP, "right");
        var newrightrs = CastSMBody(Expression.Lambda(Expression.Invoke(resultSelector, parmC, argRight), parmP, parmC), sampleAnonLR, default(TLeft), default(TResult));

        return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).SelectMany(l => l.leftg.DefaultIfEmpty(), newrightrs);
    }

    public static IQueryable<TResult> FullOuterJoinDistinct<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Union(leftItems.RightOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }

    private static Expression<Func<TP, TResult>> CastSBody<TP, TResult>(LambdaExpression ex, TP unusedP, TResult unusedRes) => (Expression<Func<TP, TResult>>)ex;

    public static IQueryable<TResult> RightAntiSemiJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        var sampleAnonLgR = new { leftg = default(IEnumerable<TLeft>), right = default(TRight) };
        var parmLgR = Expression.Parameter(sampleAnonLgR.GetType(), "lgr");
        var argLeft = Expression.Constant(default(TLeft), typeof(TLeft));
        var argRight = Expression.PropertyOrField(parmLgR, "right");
        var newrightrs = CastSBody(Expression.Lambda(Expression.Invoke(resultSelector, argLeft, argRight), parmLgR), sampleAnonLgR, default(TResult));

        return rightItems.GroupJoin(leftItems, rightKeySelector, leftKeySelector, (right, leftg) => new { leftg, right }).Where(lgr => !lgr.leftg.Any()).Select(newrightrs);
    }

    public static IQueryable<TResult> FullOuterJoin<TLeft, TRight, TKey, TResult>(
        this IQueryable<TLeft> leftItems,
        IQueryable<TRight> rightItems,
        Expression<Func<TLeft, TKey>> leftKeySelector,
        Expression<Func<TRight, TKey>> rightKeySelector,
        Expression<Func<TLeft, TRight, TResult>> resultSelector) {

        return leftItems.LeftOuterJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector).Concat(leftItems.RightAntiSemiJoin(rightItems, leftKeySelector, rightKeySelector, resultSelector));
    }
}

右反半连接(Right Anti-Semi-Join)之间的差异在Linq to Objects或源代码中大多是没有意义的,但在最终的答案中,在服务器(SQL)端产生了差异,删除了一个不必要的连接。

手动编码Expression来处理将Expression<Func<>>合并到lambda中,可以使用LinqKit进行改进,但如果语言/编译器为此添加了一些帮助,那就更好了。为了完整起见,包括了fullouterjo模糊不清和RightOuterJoin函数,但我还没有重新实现FullOuterGroupJoin。

我为IEnumerable编写了另一个完整的外部连接版本,用于键是可排序的情况,这比组合左外部连接和右反半连接快50%左右,至少在小型集合上是这样。它只对每个集合进行一次排序。

我还为使用EF的版本添加了另一个答案,即用自定义展开替换Invoke。

更新1:提供一个真正通用的扩展方法FullOuterJoin 更新2:可选地接受键类型的自定义IEqualityComparer 更新3:这个实现最近已经成为MoreLinq的一部分-谢谢大家!

编辑新增FullOuterGroupJoin (ideone)。我重用了GetOuter<>实现,使它的性能比它可能的要低一些,但我现在的目标是“高级”代码,而不是前沿优化。

请登录http://ideone.com/O36nWc观看直播

static void Main(string[] args)
{
    var ax = new[] { 
        new { id = 1, name = "John" },
        new { id = 2, name = "Sue" } };
    var bx = new[] { 
        new { id = 1, surname = "Doe" },
        new { id = 3, surname = "Smith" } };

    ax.FullOuterJoin(bx, a => a.id, b => b.id, (a, b, id) => new {a, b})
        .ToList().ForEach(Console.WriteLine);
}

打印输出:

{ a = { id = 1, name = John }, b = { id = 1, surname = Doe } }
{ a = { id = 2, name = Sue }, b =  }
{ a = , b = { id = 3, surname = Smith } }

您还可以提供默认值:http://ideone.com/kG4kqO

    ax.FullOuterJoin(
            bx, a => a.id, b => b.id, 
            (a, b, id) => new { a.name, b.surname },
            new { id = -1, name    = "(no firstname)" },
            new { id = -2, surname = "(no surname)" }
        )

印刷:

{ name = John, surname = Doe }
{ name = Sue, surname = (no surname) }
{ name = (no firstname), surname = Smith }

使用术语的解释:

连接是一个借用自关系数据库设计的术语:

只要b中有对应键的元素,连接就会重复A中的元素(即:如果b为空,则什么都没有)。数据库行话称之为内部连接(equi)。 外部连接包括没有对应的元素 元素存在于b中(即:如果b为空则结果为偶数)。这通常称为左连接。 一个完整的外部连接包括来自A和b的记录,如果另一个中不存在相应的元素。(即,如果a为空,则结果为偶数)

在RDBMS中不常见的是组连接[1]:

组连接与上面描述的相同,但它不是为多个对应的b重复A中的元素,而是用对应的键对记录进行分组。当您希望根据公共键枚举“已连接”记录时,这通常更方便。

另请参阅GroupJoin,其中还包含一些一般的背景说明。


[1](我相信Oracle和MSSQL对此有专有扩展)

完整代码

一个通用的“drop-in”扩展类

internal static class MyExtensions
{
    internal static IEnumerable<TResult> FullOuterGroupJoin<TA, TB, TKey, TResult>(
        this IEnumerable<TA> a,
        IEnumerable<TB> b,
        Func<TA, TKey> selectKeyA, 
        Func<TB, TKey> selectKeyB,
        Func<IEnumerable<TA>, IEnumerable<TB>, TKey, TResult> projection,
        IEqualityComparer<TKey> cmp = null)
    {
        cmp = cmp?? EqualityComparer<TKey>.Default;
        var alookup = a.ToLookup(selectKeyA, cmp);
        var blookup = b.ToLookup(selectKeyB, cmp);

        var keys = new HashSet<TKey>(alookup.Select(p => p.Key), cmp);
        keys.UnionWith(blookup.Select(p => p.Key));

        var join = from key in keys
                   let xa = alookup[key]
                   let xb = blookup[key]
                   select projection(xa, xb, key);

        return join;
    }

    internal static IEnumerable<TResult> FullOuterJoin<TA, TB, TKey, TResult>(
        this IEnumerable<TA> a,
        IEnumerable<TB> b,
        Func<TA, TKey> selectKeyA, 
        Func<TB, TKey> selectKeyB,
        Func<TA, TB, TKey, TResult> projection,
        TA defaultA = default(TA), 
        TB defaultB = default(TB),
        IEqualityComparer<TKey> cmp = null)
    {
        cmp = cmp?? EqualityComparer<TKey>.Default;
        var alookup = a.ToLookup(selectKeyA, cmp);
        var blookup = b.ToLookup(selectKeyB, cmp);

        var keys = new HashSet<TKey>(alookup.Select(p => p.Key), cmp);
        keys.UnionWith(blookup.Select(p => p.Key));

        var join = from key in keys
                   from xa in alookup[key].DefaultIfEmpty(defaultA)
                   from xb in blookup[key].DefaultIfEmpty(defaultB)
                   select projection(xa, xb, key);

        return join;
    }
}