EF Core AsSplitQuery Demo

Key idea - there are times when it will be much better to perform joins on client side rather than on database side.

Imagine following scenario - you gonna transfer touthands of rows from database to client. And each ro will join some dictionary which has only few values.

With that in place - if we download that dictionary to client and join on client side we will remove thousands of same bytes need to be transfered.

Technicaly it will be hash map join.

Model Generation

With help of ef core packages we can generate bunch of code instead of writing it manually

dotnet add package Microsoft.EntityFrameworkCore.SqlServer
dotnet add package Microsoft.EntityFrameworkCore.Design
dotnet tool install --global dotnet-ef

dotnet ef dbcontext scaffold "Server=localhost;Initial Catalog=Demo;User ID=sa;Password=*******" Microsoft.EntityFrameworkCore.SqlServer -o Model

Alternative approach to generate only few tables:

dotnet ef dbcontext scaffold "Server=localhost;Initial Catalog=Demo;User ID=sa;Password=*******" Microsoft.EntityFrameworkCore.SqlServer -o Demo -t City -t Vacancy

Sample app

using System.Diagnostics;
using System.Text.Json;
using AsSplitQuery;
using Microsoft.AspNetCore.Mvc;
using Microsoft.Data.SqlClient;
using Microsoft.EntityFrameworkCore;

var builder = WebApplication.CreateBuilder(args);

builder.Services.AddDbContext<Database>(optionsBuilder => optionsBuilder.UseSqlServer("Server=localhost;Initial Catalog=Demo;User ID=sa;Password=*******"));

var app = builder.Build();

app.MapGet("/", ([FromServices] Database db, [FromServices]ILogger<Database> logger) =>
{
    var timer = Stopwatch.StartNew();
    var counter = 0;

    var con = db.Database.GetDbConnection() as SqlConnection;
    con.StatisticsEnabled = true;

    var rows = db
        .Resumes.AsNoTracking()
        .Include(r => r.ResumeEducations)
        .Include(r => r.ResumeExperiences)
        .OrderBy(r => r.Id)
        .Take(10000)
        .Select(r => new
        {
            r.Speciality,
            Educations = r.ResumeEducations.Select(e => e.Name),
            Experiences = r.ResumeExperiences.Select(e => e.Position)
        })
        // .AsSplitQuery() // <- POI
        ;

    foreach (var row in rows)
    {
        counter += 1;
        if (counter % 1000 == 0)
        {
            logger.LogInformation("read {Counter} rows in {Elapsed}", counter, timer.Elapsed);
        }
    }

    var stats = con.RetrieveStatistics();
    logger.LogInformation("{Stats}", JsonSerializer.Serialize(stats));

    return $"read {counter} rows in {timer.Elapsed}\n";
});

app.Run();

Single query

read 10000 rows in 00:01:17.9403316

{
  "BuffersReceived": 2280,
  "BuffersSent": 1,
  "BytesReceived": 18238809,
  "BytesSent": 821,
  "CursorOpens": 0,
  "IduCount": 0,
  "IduRows": 0,
  "PreparedExecs": 0,
  "Prepares": 0,
  "SelectCount": 1,
  "SelectRows": 153288,
  "ServerRoundtrips": 1,
  "SumResultSets": 1,
  "Transactions": 0,
  "UnpreparedExecs": 1,
  "ConnectionTime": 75817,
  "ExecutionTime": 17201,
  "NetworkServerTime": 60
}
-- Executed DbCommand (83ms) [Parameters=[@__p_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
      SELECT [t].[Speciality], [t].[Id], [r0].[Name], [r0].[Id], [r1].[Position], [r1].[Id]
      FROM (
          SELECT TOP(@__p_0) [r].[Speciality], [r].[Id]
          FROM [Resume] AS [r]
          ORDER BY [r].[Id]
      ) AS [t]
      LEFT JOIN [ResumeEducation] AS [r0] ON [t].[Id] = [r0].[ResumeId]
      LEFT JOIN [ResumeExperience] AS [r1] ON [t].[Id] = [r1].[ResumeId]
      ORDER BY [t].[Id], [r0].[Id]

Split query

read 10000 rows in 00:00:08.9613671

{
  "BuffersReceived": 417,
  "BuffersSent": 3,
  "BytesReceived": 3324158,
  "BytesSent": 1313,
  "CursorOpens": 0,
  "IduCount": 0,
  "IduRows": 0,
  "PreparedExecs": 0,
  "Prepares": 0,
  "SelectCount": 3,
  "SelectRows": 87633,
  "ServerRoundtrips": 3,
  "SumResultSets": 2,
  "Transactions": 0,
  "UnpreparedExecs": 3,
  "ConnectionTime": 6892,
  "ExecutionTime": 1661,
  "NetworkServerTime": 282
}
      -- Executed DbCommand (130ms) [Parameters=[@__p_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
      SELECT TOP(@__p_0) [r].[Speciality], [r].[Id]
      FROM [Resume] AS [r]
      ORDER BY [r].[Id]

      -- Executed DbCommand (72ms) [Parameters=[@__p_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
      SELECT [r0].[Name], [t].[Id]
      FROM (
          SELECT TOP(@__p_0) [r].[Id]
          FROM [Resume] AS [r]
          ORDER BY [r].[Id]
      ) AS [t]
      INNER JOIN [ResumeEducation] AS [r0] ON [t].[Id] = [r0].[ResumeId]
      ORDER BY [t].[Id]

      -- Executed DbCommand (105ms) [Parameters=[@__p_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
      SELECT [r0].[Position], [t].[Id]
      FROM (
          SELECT TOP(@__p_0) [r].[Id]
          FROM [Resume] AS [r]
          ORDER BY [r].[Id]
      ) AS [t]
      INNER JOIN [ResumeExperience] AS [r0] ON [t].[Id] = [r0].[ResumeId]
      ORDER BY [t].[Id]