SourceGenerator 生成db to class代码优化结果记录二

技术分享 11个月前 (08-04) 0 999+

关注

优化

在上一篇留下的 Dapper AOT 还有什么特别优化点的问题

在仔细阅读生成代码和源码之后，终于得到了答案

个人之前一直以为 Dapper AOT 只用了迭代器去实现，所以理应差不多实现代码却又极大差距，思维陷入了僵局，一度以为有什么黑魔法

结果 Dapper AOT 没有用迭代器去实现!!! 靠北啦，还以为迭代器有新姿势可以优化了

不再使用迭代器

List<BenchmarkTest.Dog> results = new(); try {     while (reader.Read())     {         results.Add(ReadOne(reader, readOnlyTokens));     }     return results; }

当然就只能要求用户必须使用 AsList 方法，因为 ToList 会导致复制list的问题，导致负优化，

像这样

 connection.Query<Dog>("select * from dog").AsList();  // AsList 实现 public static List<T> AsList<T>(this IEnumerable<T>? source) => source switch {     null => null!,     List<T> list => list,     _ => Enumerable.ToList(source), };

使用 span

再没有了迭代器方法限制， span 就可以放飞自我，随意使用了

public static BenchmarkTest.Dog ReadOne(this IDataReader reader, ref ReadOnlySpan<int> ss) {     var d = new BenchmarkTest.Dog();     for (int j = 0; j < ss.Length; j++)     {

使用 ArrayPool 减少内存占用

public Span<int> GetTokens() {     FieldCount = Reader!.FieldCount;     if (Tokens is null || Tokens.Length < FieldCount)     {         // no leased array, or existing lease is not big enough; rent a new array         if (Tokens is not null) ArrayPool<int>.Shared.Return(Tokens);         Tokens = ArrayPool<int>.Shared.Rent(FieldCount);     }     return MemoryMarshal.CreateSpan(ref MemoryMarshal.GetArrayDataReference(Tokens), FieldCount); }

数据小时使用栈分配

 var s = reader.FieldCount <= 64 ? MemoryMarshal.CreateSpan(ref MemoryMarshal.GetReference(stackalloc int[reader.FieldCount]), reader.FieldCount) :  state.GetTokens();

提前生成部分 hashcode 进行比较

因为比较现在也并不耗时了，所以缓存也没有必要了，也一并移除

public static void GenerateReadTokens(this IDataReader reader, Span<int> s) {     for (int i = 0; i < reader.FieldCount; i++)     {         var name = reader.GetName(i);         var type = reader.GetFieldType(i);         switch (EntitiesGenerator.NormalizedHash(name))         {                          case 742476188U:                 s[i] = type == typeof(int) ? 1 : 2;                  break;              case 2369371622U:                 s[i] = type == typeof(string) ? 3 : 4;                  break;              case 1352703673U:                 s[i] = type == typeof(float) ? 5 : 6;                  break;              default:                 break;         }     } }

性能测试说明

BenchmarkDotNet

这里特别说明一下

使用的 BenchmarkDotNet ，其本身已经考虑了 jit优化等等方面，有预热，超多次执行，

结果值也是按照统计学有考虑结果集分布情况处理，移除变差大的值(比如少数的孤立的极大极小值)，差异不大情况，一般显示平均值，有大差异时还会显示中位值

感兴趣的童鞋可以去 https://github.com/dotnet/BenchmarkDotNet 了解

chole 有点棘手，为了方便mock，所以 copy了部分源码，只比较实体映射部分

DapperAOT 和纯 dapper 很难一起运行，所以不再比较了，反正 dapper 肯定慢

测试数据

测试数据正如之前说过，采用手动 mock 方式，避免 db 驱动、db 执行、mock库等等带来的执行差异影响

class

非常简单的类，当然不能代表所有情况，不过简单测试够用了

public class Dog {     public int? Age { get; set; }     public string Name { get; set; }     public float? Weight { get; set; } }

mock 数据

 public class TestDbConnection : DbConnection  {      public int RowCount { get; set; }      public IDbCommand CreateCommand()     {         return new TestDbCommand() { RowCount = RowCount };     } }  public class TestDbCommand : DbCommand {     public int RowCount { get; set; }      public IDataParameterCollection Parameters { get; } = new TestDataParameterCollection();     public IDbDataParameter CreateParameter()       {          return new TestDataParameter();       }          protected override DbDataReader ExecuteDbDataReader(CommandBehavior behavior)         {             return new TestDbDataReader() { RowCount = RowCount };         } }      public class TestDbDataReader : DbDataReader     {         public int RowCount { get; set; }         private int calls = 0;         public override object this[int ordinal]          {             get             {                 switch (ordinal)                 {                     case 0:                         return "XX";                     case 1:                         return 2;                     case 2:                         return 3.3f;                     default:                         return null;                 }             }                  }       public override int FieldCount => 3;        public override Type GetFieldType(int ordinal)       {           switch (ordinal)           {               case 0:                   return typeof(string);               case 1:                   return typeof(int);               case 2:                   return typeof(float);               default:                   return null;           }       }        public override float GetFloat(int ordinal)       {           switch (ordinal)           {               case 2:                   return 3.3f;               default:                   return 0;           }       }         public override int GetInt32(int ordinal)         {             switch (ordinal)             {                 case 1:                     return 2;                 default:                     return 0;             }         }         public override string GetName(int ordinal)         {             switch (ordinal)             {                 case 0:                     return "Name";                 case 1:                     return "Age";                 case 2:                     return "Weight";                 default:                     return null;             }         }         public override string GetString(int ordinal)         {             switch (ordinal)             {                 case 0:                     return "XX";                 default:                     return null;             }         }          public override object GetValue(int ordinal)         {             switch (ordinal)             {                 case 0:                     return "XX";                 case 1:                     return 2;                 case 2:                     return 3.3f;                 default:                     return null;             }         }          public override bool Read()         {             calls++;             return calls <= RowCount;         } }

Benchmark 代码

    [MemoryDiagnoser, Orderer(summaryOrderPolicy: SummaryOrderPolicy.FastestToSlowest), GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory), CategoriesColumn]     public class ObjectMappingTest     {         [Params(1, 1000, 10000, 100000, 1000000)]         public int RowCount { get; set; }          [Benchmark(Baseline = true)]         public void SetClass()         {             var connection = new TestDbConnection() { RowCount = RowCount };             var dogs = new List<Dog>();             try             {                 connection.Open();                 var cmd = connection.CreateCommand();                 cmd.CommandText = "select ";                 using (var reader = cmd.ExecuteReader(CommandBehavior.Default))                 {                     while (reader.Read())                     {                         var dog = new Dog();                         dogs.Add(dog);                         dog.Name = reader.GetString(0);                         dog.Age = reader.GetInt32(1);                         dog.Weight = reader.GetFloat(2);                     }                 }             }             finally             {                 connection.Close();             }         }          [Benchmark]         public void DapperAOT()         {             var connection = new TestDbConnection() { RowCount = RowCount };             var dogs = connection.Query<Dog>("select * from dog").AsList();         }          [Benchmark]         public void SourceGenerator()         {             var connection = new TestDbConnection() { RowCount = RowCount };             List<Dog> dogs;             try             {                 connection.Open();                 var cmd = connection.CreateCommand();                 cmd.CommandText = "select ";                 using (var reader = cmd.ExecuteReader(CommandBehavior.Default))                 {                     dogs = reader.ReadTo<Dog>().AsList();                 }             }             finally             {                 connection.Close();             }         }          [Benchmark]         public void Chloe()         {             var connection = new TestDbConnection() { RowCount = RowCount };             try             {                 connection.Open();                 var cmd = connection.CreateCommand();                 var dogs = new InternalSqlQuery<Dog>(cmd, "select").AsList();             }             finally             {                 connection.Close();             }         }     }

完整代码可以参考 https://github.com/fs7744/SlowestEM

测试结果

 BenchmarkDotNet v0.13.12, Windows 10 (10.0.19045.4651/22H2/2022Update) Intel Core i7-10700 CPU 2.90GHz, 1 CPU, 16 logical and 8 physical cores .NET SDK 9.0.100-preview.5.24307.3   [Host]     : .NET 8.0.6 (8.0.624.26715), X64 RyuJIT AVX2   DefaultJob : .NET 8.0.6 (8.0.624.26715), X64 RyuJIT AVX2

Method	RowCount	Mean	Error	StdDev	Ratio	RatioSD	Gen0	Gen1	Gen2	Allocated	Alloc Ratio
DapperAOT	1	446.3 ns	8.81 ns	8.65 ns	0.60	0.03	0.0525	0.0515	-	440 B	1.00
SourceGenerator	1	690.0 ns	13.72 ns	32.34 ns	0.95	0.07	0.0525	0.0515	-	440 B	1.00
SetClass	1	728.3 ns	14.59 ns	37.41 ns	1.00	0.00	0.0525	0.0515	-	440 B	1.00
Chloe	1	909.7 ns	17.49 ns	22.75 ns	1.25	0.06	0.1020	0.1011	-	856 B	1.95

SetClass	1000	8,593.3 ns	169.90 ns	390.38 ns	1.00	0.00	6.7902	1.6937	-	56912 B	1.00
SourceGenerator	1000	16,967.8 ns	310.02 ns	258.88 ns	1.91	0.08	6.7749	1.6785	-	56912 B	1.00
DapperAOT	1000	18,299.7 ns	267.72 ns	250.43 ns	2.06	0.09	6.7749	1.3428	-	56912 B	1.00
Chloe	1000	116,049.4 ns	297.71 ns	263.91 ns	13.06	0.54	6.8359	1.7090	-	57328 B	1.01

SetClass	10000	309,255.1 ns	3,945.26 ns	3,294.47 ns	1.00	0.00	83.0078	82.5195	41.5039	662782 B	1.00
DapperAOT	10000	402,700.7 ns	7,676.45 ns	7,180.56 ns	1.31	0.03	83.0078	82.5195	41.5039	662782 B	1.00
SourceGenerator	10000	414,226.2 ns	8,149.22 ns	10,007.97 ns	1.34	0.04	83.0078	82.5195	41.5039	662782 B	1.00
Chloe	10000	1,453,166.1 ns	19,660.10 ns	17,428.16 ns	4.70	0.07	82.0313	80.0781	41.0156	663199 B	1.00

SetClass	100000	2,176,860.4 ns	42,449.84 ns	63,536.93 ns	1.00	0.00	496.0938	496.0938	496.0938	6098015 B	1.00
SourceGenerator	100000	3,045,760.4 ns	59,378.23 ns	63,534.04 ns	1.39	0.05	496.0938	496.0938	496.0938	6098015 B	1.00
DapperAOT	100000	3,053,510.0 ns	35,015.61 ns	29,239.62 ns	1.40	0.04	496.0938	496.0938	496.0938	6098015 B	1.00
Chloe	100000	13,152,653.6 ns	65,400.49 ns	51,060.40 ns	6.02	0.14	484.3750	484.3750	484.3750	6098433 B	1.00

SetClass	1000000	105,420,410.0 ns	2,093,734.23 ns	3,380,990.50 ns	1.00	0.00	6800.0000	6800.0000	2200.0000	56780029 B	1.00
SourceGenerator	1000000	115,534,043.8 ns	1,828,036.86 ns	1,795,376.62 ns	1.09	0.03	6800.0000	6800.0000	2200.0000	56780118 B	1.00
DapperAOT	1000000	115,751,485.5 ns	2,120,239.39 ns	2,603,844.38 ns	1.10	0.04	6800.0000	6800.0000	2200.0000	56780029 B	1.00
Chloe	1000000	208,295,919.3 ns	4,031,590.18 ns	4,481,101.81 ns	1.97	0.06	6666.6667	6666.6667	2333.3333	56781907 B	1.00