我有一份人们的身份证和名字的名单,还有一份人们的身份证和姓氏的名单。有些人没有名字,有些人没有姓;我想在这两个列表上做一个完整的外部连接。

下面列出:

ID  FirstName
--  ---------
 1  John
 2  Sue

ID  LastName
--  --------
 1  Doe
 3  Smith

应该生产:

ID  FirstName  LastName
--  ---------  --------
 1  John       Doe
 2  Sue
 3             Smith

我已经发现了相当多的解决方案的“LINQ外部连接”,它们看起来都很相似,但似乎真的是离开外部连接。

到目前为止我的尝试是这样的:

private void OuterJoinTest()
{
    List<FirstName> firstNames = new List<FirstName>();
    firstNames.Add(new FirstName { ID = 1, Name = "John" });
    firstNames.Add(new FirstName { ID = 2, Name = "Sue" });

    List<LastName> lastNames = new List<LastName>();
    lastNames.Add(new LastName { ID = 1, Name = "Doe" });
    lastNames.Add(new LastName { ID = 3, Name = "Smith" });

    var outerJoin = from first in firstNames
        join last in lastNames
        on first.ID equals last.ID
        into temp
        from last in temp.DefaultIfEmpty()
        select new
        {
            id = first != null ? first.ID : last.ID,
            firstname = first != null ? first.Name : string.Empty,
            surname = last != null ? last.Name : string.Empty
        };
    }
}

public class FirstName
{
    public int ID;
    
    public string Name;
}
    
public class LastName
{
    public int ID;
    
    public string Name;
}

但结果是:

ID  FirstName  LastName
--  ---------  --------
 1  John       Doe
 2  Sue

我做错了什么?


当前回答

在两个输入上执行内存流枚举,并为每一行调用选择器。如果在当前迭代中没有相关性,则选择器参数之一将为空。

例子:

   var result = left.FullOuterJoin(
         right, 
         x=>left.Key, 
         x=>right.Key, 
         (l,r) => new { LeftKey = l?.Key, RightKey=r?.Key });

Requires an IComparer for the correlation type, uses the Comparer.Default if not provided. Requires that 'OrderBy' is applied to the input enumerables /// <summary> /// Performs a full outer join on two <see cref="IEnumerable{T}" />. /// </summary> /// <typeparam name="TLeft"></typeparam> /// <typeparam name="TValue"></typeparam> /// <typeparam name="TRight"></typeparam> /// <typeparam name="TResult"></typeparam> /// <param name="left"></param> /// <param name="right"></param> /// <param name="leftKeySelector"></param> /// <param name="rightKeySelector"></param> /// <param name="selector">Expression defining result type</param> /// <param name="keyComparer">A comparer if there is no default for the type</param> /// <returns></returns> [System.Diagnostics.DebuggerStepThrough] public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TValue, TResult>( this IEnumerable<TLeft> left, IEnumerable<TRight> right, Func<TLeft, TValue> leftKeySelector, Func<TRight, TValue> rightKeySelector, Func<TLeft, TRight, TResult> selector, IComparer<TValue> keyComparer = null) where TLeft: class where TRight: class where TValue : IComparable { keyComparer = keyComparer ?? Comparer<TValue>.Default; using (var enumLeft = left.OrderBy(leftKeySelector).GetEnumerator()) using (var enumRight = right.OrderBy(rightKeySelector).GetEnumerator()) { var hasLeft = enumLeft.MoveNext(); var hasRight = enumRight.MoveNext(); while (hasLeft || hasRight) { var currentLeft = enumLeft.Current; var valueLeft = hasLeft ? leftKeySelector(currentLeft) : default(TValue); var currentRight = enumRight.Current; var valueRight = hasRight ? rightKeySelector(currentRight) : default(TValue); int compare = !hasLeft ? 1 : !hasRight ? -1 : keyComparer.Compare(valueLeft, valueRight); switch (compare) { case 0: // The selector matches. An inner join is achieved yield return selector(currentLeft, currentRight); hasLeft = enumLeft.MoveNext(); hasRight = enumRight.MoveNext(); break; case -1: yield return selector(currentLeft, default(TRight)); hasLeft = enumLeft.MoveNext(); break; case 1: yield return selector(default(TLeft), currentRight); hasRight = enumRight.MoveNext(); break; } } } }

其他回答

我认为LINQ join子句并不是这个问题的正确解决方案,因为join子句的目的并不是按照这个任务解决方案所需的方式来积累数据。合并创建的独立集合的代码变得太复杂了,也许这对于学习目的来说是可以的,但对于真正的应用程序来说不是。解决这个问题的方法之一是下面的代码:

class Program
{
    static void Main(string[] args)
    {
        List<FirstName> firstNames = new List<FirstName>();
        firstNames.Add(new FirstName { ID = 1, Name = "John" });
        firstNames.Add(new FirstName { ID = 2, Name = "Sue" });

        List<LastName> lastNames = new List<LastName>();
        lastNames.Add(new LastName { ID = 1, Name = "Doe" });
        lastNames.Add(new LastName { ID = 3, Name = "Smith" });

        HashSet<int> ids = new HashSet<int>();
        foreach (var name in firstNames)
        {
            ids.Add(name.ID);
        }
        foreach (var name in lastNames)
        {
            ids.Add(name.ID);
        }
        List<FullName> fullNames = new List<FullName>();
        foreach (int id in ids)
        {
            FullName fullName = new FullName();
            fullName.ID = id;
            FirstName firstName = firstNames.Find(f => f.ID == id);
            fullName.FirstName = firstName != null ? firstName.Name : string.Empty;
            LastName lastName = lastNames.Find(l => l.ID == id);
            fullName.LastName = lastName != null ? lastName.Name : string.Empty;
            fullNames.Add(fullName);
        }
    }
}
public class FirstName
{
    public int ID;

    public string Name;
}

public class LastName
{
    public int ID;

    public string Name;
}
class FullName
{
    public int ID;

    public string FirstName;

    public string LastName;
}

如果真正的集合对于HashSet的形成很大,而不是foreach循环可以使用下面的代码:

List<int> firstIds = firstNames.Select(f => f.ID).ToList();
List<int> LastIds = lastNames.Select(l => l.ID).ToList();
HashSet<int> ids = new HashSet<int>(firstIds.Union(LastIds));//Only unique IDs will be included in HashSet

我大约在6年前为一个应用程序编写了这个扩展类,并且从那时起一直在许多解决方案中使用它,没有任何问题。希望能有所帮助。

编辑:我注意到有些人可能不知道如何使用扩展类。

要使用此扩展类,只需在类中添加以下行引用其名称空间 使用joinext;

^这应该允许你在任何你碰巧使用的IEnumerable对象集合上看到扩展函数的智能感知。

希望这能有所帮助。如果仍然不清楚,请告诉我,我希望写一个关于如何使用它的示例。

下面是这个类:

namespace joinext
{    
public static class JoinExtensions
    {
        public static IEnumerable<TResult> FullOuterJoin<TOuter, TInner, TKey, TResult>(
            this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner,
            Func<TOuter, TKey> outerKeySelector,
            Func<TInner, TKey> innerKeySelector,
            Func<TOuter, TInner, TResult> resultSelector)
            where TInner : class
            where TOuter : class
        {
            var innerLookup = inner.ToLookup(innerKeySelector);
            var outerLookup = outer.ToLookup(outerKeySelector);

            var innerJoinItems = inner
                .Where(innerItem => !outerLookup.Contains(innerKeySelector(innerItem)))
                .Select(innerItem => resultSelector(null, innerItem));

            return outer
                .SelectMany(outerItem =>
                {
                    var innerItems = innerLookup[outerKeySelector(outerItem)];

                    return innerItems.Any() ? innerItems : new TInner[] { null };
                }, resultSelector)
                .Concat(innerJoinItems);
        }


        public static IEnumerable<TResult> LeftJoin<TOuter, TInner, TKey, TResult>(
            this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner,
            Func<TOuter, TKey> outerKeySelector,
            Func<TInner, TKey> innerKeySelector,
            Func<TOuter, TInner, TResult> resultSelector)
        {
            return outer.GroupJoin(
                inner,
                outerKeySelector,
                innerKeySelector,
                (o, i) =>
                    new { o = o, i = i.DefaultIfEmpty() })
                    .SelectMany(m => m.i.Select(inn =>
                        resultSelector(m.o, inn)
                        ));

        }



        public static IEnumerable<TResult> RightJoin<TOuter, TInner, TKey, TResult>(
            this IEnumerable<TOuter> outer,
            IEnumerable<TInner> inner,
            Func<TOuter, TKey> outerKeySelector,
            Func<TInner, TKey> innerKeySelector,
            Func<TOuter, TInner, TResult> resultSelector)
        {
            return inner.GroupJoin(
                outer,
                innerKeySelector,
                outerKeySelector,
                (i, o) =>
                    new { i = i, o = o.DefaultIfEmpty() })
                    .SelectMany(m => m.o.Select(outt =>
                        resultSelector(outt, m.i)
                        ));

        }

    }
}

我猜@sehe的方法更强大,但在我更好地理解它之前,我发现自己跳过了@MichaelSander的扩展。我修改了它,以匹配这里描述的内置Enumerable.Join()方法的语法和返回类型。我在@JeffMercado的解决方案下为@cadrell0的注释添加了“distinct”后缀。

public static class MyExtensions {

    public static IEnumerable<TResult> FullJoinDistinct<TLeft, TRight, TKey, TResult> (
        this IEnumerable<TLeft> leftItems, 
        IEnumerable<TRight> rightItems, 
        Func<TLeft, TKey> leftKeySelector, 
        Func<TRight, TKey> rightKeySelector,
        Func<TLeft, TRight, TResult> resultSelector
    ) {

        var leftJoin = 
            from left in leftItems
            join right in rightItems 
              on leftKeySelector(left) equals rightKeySelector(right) into temp
            from right in temp.DefaultIfEmpty()
            select resultSelector(left, right);

        var rightJoin = 
            from right in rightItems
            join left in leftItems 
              on rightKeySelector(right) equals leftKeySelector(left) into temp
            from left in temp.DefaultIfEmpty()
            select resultSelector(left, right);

        return leftJoin.Union(rightJoin);
    }

}

在这个例子中,你可以这样使用它:

var test = 
    firstNames
    .FullJoinDistinct(
        lastNames,
        f=> f.ID,
        j=> j.ID,
        (f,j)=> new {
            ID = f == null ? j.ID : f.ID, 
            leftName = f == null ? null : f.Name,
            rightName = j == null ? null : j.Name
        }
    );

在未来,随着我了解更多,我有一种感觉,我会迁移到@sehe的逻辑,因为它很受欢迎。但即使这样,我也必须小心,因为我觉得如果可行的话,至少有一个重载与现有的“.Join()”方法的语法匹配是很重要的,原因有两个:

方法的一致性有助于节省时间、避免错误和避免意外行为。 如果将来有一个开箱即用的“. fulljoin()”方法,我想它会尽量保持当前存在的“. join()”方法的语法。如果是这样,那么如果您想迁移到它,您可以简单地重命名函数,而不需要更改参数或担心不同的返回类型破坏您的代码。

对于泛型、扩展、Func语句和其他特性,我还是新手,所以当然欢迎反馈。

编辑:我没花很长时间就意识到我的代码有问题。我在LINQPad中做了一个. dump(),并查看返回类型。它只是IEnumerable,所以我试着匹配它。但是当我实际上在我的扩展上做了. where()或. select()时,我得到了一个错误:“系统集合。IEnumerable'不包含'Select'和…"的定义。因此,最后我能够匹配. join()的输入语法,但不能匹配返回行为。

编辑:将“TResult”添加到函数的返回类型中。在阅读微软的文章时错过了这一点,当然这是有道理的。有了这个修复,现在看来返回行为是符合我的目标毕竟。

在两个输入上执行内存流枚举,并为每一行调用选择器。如果在当前迭代中没有相关性,则选择器参数之一将为空。

例子:

   var result = left.FullOuterJoin(
         right, 
         x=>left.Key, 
         x=>right.Key, 
         (l,r) => new { LeftKey = l?.Key, RightKey=r?.Key });

Requires an IComparer for the correlation type, uses the Comparer.Default if not provided. Requires that 'OrderBy' is applied to the input enumerables /// <summary> /// Performs a full outer join on two <see cref="IEnumerable{T}" />. /// </summary> /// <typeparam name="TLeft"></typeparam> /// <typeparam name="TValue"></typeparam> /// <typeparam name="TRight"></typeparam> /// <typeparam name="TResult"></typeparam> /// <param name="left"></param> /// <param name="right"></param> /// <param name="leftKeySelector"></param> /// <param name="rightKeySelector"></param> /// <param name="selector">Expression defining result type</param> /// <param name="keyComparer">A comparer if there is no default for the type</param> /// <returns></returns> [System.Diagnostics.DebuggerStepThrough] public static IEnumerable<TResult> FullOuterJoin<TLeft, TRight, TValue, TResult>( this IEnumerable<TLeft> left, IEnumerable<TRight> right, Func<TLeft, TValue> leftKeySelector, Func<TRight, TValue> rightKeySelector, Func<TLeft, TRight, TResult> selector, IComparer<TValue> keyComparer = null) where TLeft: class where TRight: class where TValue : IComparable { keyComparer = keyComparer ?? Comparer<TValue>.Default; using (var enumLeft = left.OrderBy(leftKeySelector).GetEnumerator()) using (var enumRight = right.OrderBy(rightKeySelector).GetEnumerator()) { var hasLeft = enumLeft.MoveNext(); var hasRight = enumRight.MoveNext(); while (hasLeft || hasRight) { var currentLeft = enumLeft.Current; var valueLeft = hasLeft ? leftKeySelector(currentLeft) : default(TValue); var currentRight = enumRight.Current; var valueRight = hasRight ? rightKeySelector(currentRight) : default(TValue); int compare = !hasLeft ? 1 : !hasRight ? -1 : keyComparer.Compare(valueLeft, valueRight); switch (compare) { case 0: // The selector matches. An inner join is achieved yield return selector(currentLeft, currentRight); hasLeft = enumLeft.MoveNext(); hasRight = enumRight.MoveNext(); break; case -1: yield return selector(currentLeft, default(TRight)); hasLeft = enumLeft.MoveNext(); break; case 1: yield return selector(default(TLeft), currentRight); hasRight = enumRight.MoveNext(); break; } } } }

这是另一个完整的外部连接

由于对其他命题的简单性和可读性不太满意,我最后得出了这样的结论:

它没有快速的自命(在2020m CPU上加入1000 * 1000大约800毫秒:2.4ghz / 2核)。对我来说,它只是一个紧凑而随意的完全外部连接。

它的工作原理与SQL FULL OUTER JOIN相同(重复保存)

欢呼;-)

using System;
using System.Collections.Generic;
using System.Linq;
namespace NS
{
public static class DataReunion
{
    public static List<Tuple<T1, T2>> FullJoin<T1, T2, TKey>(List<T1> List1, Func<T1, TKey> KeyFunc1, List<T2> List2, Func<T2, TKey> KeyFunc2)
    {
        List<Tuple<T1, T2>> result = new List<Tuple<T1, T2>>();

        Tuple<TKey, T1>[] identifiedList1 = List1.Select(_ => Tuple.Create(KeyFunc1(_), _)).OrderBy(_ => _.Item1).ToArray();
        Tuple<TKey, T2>[] identifiedList2 = List2.Select(_ => Tuple.Create(KeyFunc2(_), _)).OrderBy(_ => _.Item1).ToArray();

        identifiedList1.Where(_ => !identifiedList2.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
            result.Add(Tuple.Create<T1, T2>(_.Item2, default(T2)));
        });

        result.AddRange(
            identifiedList1.Join(identifiedList2, left => left.Item1, right => right.Item1, (left, right) => Tuple.Create<T1, T2>(left.Item2, right.Item2)).ToList()
        );

        identifiedList2.Where(_ => !identifiedList1.Select(__ => __.Item1).Contains(_.Item1)).ToList().ForEach(_ => {
            result.Add(Tuple.Create<T1, T2>(default(T1), _.Item2));
        });

        return result;
    }
}
}

这个想法是

基于提供的关键函数生成器构建id 处理仅剩下的项 流程内部连接 只处理正确的项目

下面是一个与之相关的简单测试:

在结束处放置断点,以手动验证它的行为是否符合预期

using System;
using System.Collections.Generic;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using NS;

namespace Tests
{
[TestClass]
public class DataReunionTest
{
    [TestMethod]
    public void Test()
    {
        List<Tuple<Int32, Int32, String>> A = new List<Tuple<Int32, Int32, String>>();
        List<Tuple<Int32, Int32, String>> B = new List<Tuple<Int32, Int32, String>>();

        Random rnd = new Random();

        /* Comment the testing block you do not want to run
        /* Solution to test a wide range of keys*/

        for (int i = 0; i < 500; i += 1)
        {
            A.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "A"));
            B.Add(Tuple.Create(rnd.Next(1, 101), rnd.Next(1, 101), "B"));
        }

        /* Solution for essential testing*/

        A.Add(Tuple.Create(1, 2, "B11"));
        A.Add(Tuple.Create(1, 2, "B12"));
        A.Add(Tuple.Create(1, 3, "C11"));
        A.Add(Tuple.Create(1, 3, "C12"));
        A.Add(Tuple.Create(1, 3, "C13"));
        A.Add(Tuple.Create(1, 4, "D1"));

        B.Add(Tuple.Create(1, 1, "A21"));
        B.Add(Tuple.Create(1, 1, "A22"));
        B.Add(Tuple.Create(1, 1, "A23"));
        B.Add(Tuple.Create(1, 2, "B21"));
        B.Add(Tuple.Create(1, 2, "B22"));
        B.Add(Tuple.Create(1, 2, "B23"));
        B.Add(Tuple.Create(1, 3, "C2"));
        B.Add(Tuple.Create(1, 5, "E2"));

        Func<Tuple<Int32, Int32, String>, Tuple<Int32, Int32>> key = (_) => Tuple.Create(_.Item1, _.Item2);

        var watch = System.Diagnostics.Stopwatch.StartNew();
        var res = DataReunion.FullJoin(A, key, B, key);
        watch.Stop();
        var elapsedMs = watch.ElapsedMilliseconds;
        String aser = JToken.FromObject(res).ToString(Formatting.Indented);
        Console.Write(elapsedMs);
    }
}

}