article: add more discussion about struct size

golang-design · Nov 5, 2020 · 77a03a0 · 77a03a0
1 parent 7721772
commit 77a03a0
Show file tree

Hide file tree

Showing 9 changed files with 1,318 additions and 1 deletion.
diff --git a/pointer-params.md b/pointer-params.md
@@ -2,7 +2,7 @@
 
 Author(s): [Changkun Ou](https://changkun.de)
 
-Last updated: 2020-10-27
+Last updated: 2020-11-05
 
 ## Introduction
 
@@ -213,6 +213,212 @@ copies the parameter to different registers (e.g., copy pointers to AX and CX),
 then write back when returning. Therefore, with inline disabled, the reason that `addv` 
 is slower than `addp` is caused by different memory access pattern.
 
+## Conclusion
+
+Can pass by value always faster than pass by pointer? We could do a further test.
+But this time, we need use a generator to generate all possible cases. Here
+is how we could do it:
+
+```go
+// gen.go
+
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"strings"
+	"text/template"
+)
+
+var (
+	head = `// Code generated by go run gen.go; DO NOT EDIT.
+package fields_test
+
+import "testing"
+`
+	structTmpl = template.Must(template.New("ss").Parse(`
+type {{.Name}} struct {
+	{{.Properties}}
+}
+
+func (s {{.Name}}) addv(ss {{.Name}}) {{.Name}} {
+	return {{.Name}}{
+		{{.Addv}}
+	}
+}
+
+func (s *{{.Name}}) addp(ss *{{.Name}}) *{{.Name}} {
+	{{.Addp}}
+	return s
+}
+`))
+	benchHead = `func BenchmarkVec(b *testing.B) {`
+	benchTail = `}`
+	benchBody = template.Must(template.New("bench").Parse(`
+	b.Run("addv-{{.Name}}", func(b *testing.B) {
+		{{.InitV}}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if i%2 == 0 {
+				v1 = v1.addv(v2)
+			} else {
+				v2 = v2.addv(v1)
+			}
+		}
+	})
+	b.Run("addp-{{.Name}}", func(b *testing.B) {
+		{{.InitP}}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if i%2 == 0 {
+				v1 = v1.addp(v2)
+			} else {
+				v2 = v2.addp(v1)
+			}
+		}
+	})
+`))
+)
+
+type structFields struct {
+	Name       string
+	Properties string
+	Addv       string
+	Addp       string
+}
+type benchFields struct {
+	Name  string
+	InitV string
+	InitP string
+}
+
+func main() {
+	w := new(bytes.Buffer)
+	w.WriteString(head)
+
+	N := 10
+
+	for i := 0; i < N; i++ {
+		var (
+			ps   = []string{}
+			adv  = []string{}
+			adpl = []string{}
+			adpr = []string{}
+		)
+		for j := 0; j <= i; j++ {
+			ps = append(ps, fmt.Sprintf("x%d\tfloat64", j))
+			adv = append(adv, fmt.Sprintf("s.x%d + ss.x%d,", j, j))
+			adpl = append(adpl, fmt.Sprintf("s.x%d", j))
+			adpr = append(adpr, fmt.Sprintf("s.x%d + ss.x%d", j, j))
+		}
+		err := structTmpl.Execute(w, structFields{
+			Name:       fmt.Sprintf("s%d", i),
+			Properties: strings.Join(ps, "\n"),
+			Addv:       strings.Join(adv, "\n"),
+			Addp:       strings.Join(adpl, ",") + " = " + strings.Join(adpr, ","),
+		})
+		if err != nil {
+			panic(err)
+		}
+	}
+
+	w.WriteString(benchHead)
+	for i := 0; i < N; i++ {
+		nums1, nums2 := []string{}, []string{}
+		for j := 0; j <= i; j++ {
+			nums1 = append(nums1, fmt.Sprintf("%d", j))
+			nums2 = append(nums2, fmt.Sprintf("%d", j+i))
+		}
+		numstr1 := strings.Join(nums1, ", ")
+		numstr2 := strings.Join(nums2, ", ")
+
+		err := benchBody.Execute(w, benchFields{
+			Name: fmt.Sprintf("s%d", i),
+			InitV: fmt.Sprintf(`v1 := s%d{%s}
+v2 := s%d{%s}`, i, numstr1, i, numstr2),
+			InitP: fmt.Sprintf(`v1 := &s%d{%s}
+			v2 := &s%d{%s}`, i, numstr1, i, numstr2),
+		})
+		if err != nil {
+			panic(err)
+		}
+	}
+	w.WriteString(benchTail)
+
+	out, err := format.Source(w.Bytes())
+	if err != nil {
+		panic(err)
+	}
+	if err := ioutil.WriteFile("impl_test.go", out, 0660); err != nil {
+		panic(err)
+	}
+}
+```
+
+If we generate our test code and perform the same benchmark procedure again:
+
+```bash
+$ go generate
+$ perflock -governor 80% go test -v -run=none -bench=. -count=10 | tee inline.txt
+$ benchstat inline.txt
+name            time/op
+Vec/addv-s0-16  0.25ns ± 0%
+Vec/addp-s0-16  2.20ns ± 0%
+Vec/addv-s1-16  0.49ns ± 1%
+Vec/addp-s1-16  2.20ns ± 0%
+Vec/addv-s2-16  0.25ns ± 1%
+Vec/addp-s2-16  2.20ns ± 0%
+Vec/addv-s3-16  0.49ns ± 2%
+Vec/addp-s3-16  2.21ns ± 1%
+Vec/addv-s4-16  8.29ns ± 0%
+Vec/addp-s4-16  2.37ns ± 1%
+Vec/addv-s5-16  9.06ns ± 1%
+Vec/addp-s5-16  2.74ns ± 1%
+Vec/addv-s6-16   9.9ns ± 0%
+Vec/addp-s6-16  3.17ns ± 0%
+Vec/addv-s7-16  10.9ns ± 1%
+Vec/addp-s7-16  3.27ns ± 1%
+Vec/addv-s8-16  11.4ns ± 0%
+Vec/addp-s8-16  3.29ns ± 0%
+Vec/addv-s9-16  13.4ns ± 1%
+Vec/addp-s9-16  3.37ns ± 0%
+```
+
+We could even further try a version that disables inline:
+
+```diff
+	structTmpl = template.Must(template.New("ss").Parse(`
+type {{.Name}} struct {
+	{{.Properties}}
+}
++//go:noinline
+func (s {{.Name}}) addv(ss {{.Name}}) {{.Name}} {
+	return {{.Name}}{
+		{{.Addv}}
+	}
+}
++//go:noinline
+func (s *{{.Name}}) addp(ss *{{.Name}}) *{{.Name}} {
+	{{.Addp}}
+	return s
+}
+`))
+```
+
+Eventually, we will endup with the following results:
+
+![](./pointer-params/vis.png)
+
+TLDR: The above figure basically demonstrates when should you pass-by-value
+or pass-by-pointer. If you are certain that your code won't produce any escape
+variables, and the size of your argument is smaller than 4*4 = 16 bytes,
+then you should go for pass-by-value; otherwise, you should keep using pointers.
+
 ## Further Reading Suggestions
 
 - Changkun Ou. Conduct Reliable Benchmarking in Go. March 26, 2020. https://golang.design/s/gobench

diff --git a/pointer-params/fields/Makefile b/pointer-params/fields/Makefile
@@ -0,0 +1,4 @@
+all:
+	go generate
+	perflock -governor 80% go test -v -run=none -bench=. -count=10 | tee inline.txt
+	benchstat inline.txt
diff --git a/pointer-params/fields/benchstat.txt b/pointer-params/fields/benchstat.txt
@@ -0,0 +1,21 @@
+Vec/addv-s0-16  3.64ns ± 1%  0.25ns ± 0%  -93.24%  (p=0.000 n=10+9)
+Vec/addv-s1-16  3.82ns ± 0%  0.49ns ± 1%  -87.10%  (p=0.000 n=10+8)
+Vec/addv-s2-16  4.29ns ± 0%  0.25ns ± 1%  -94.28%  (p=0.000 n=9+10)
+Vec/addv-s3-16  5.00ns ± 1%  0.49ns ± 2%  -90.12%  (p=0.000 n=10+10)
+Vec/addv-s4-16  10.5ns ± 0%   8.3ns ± 0%  -21.08%  (p=0.000 n=10+8)
+Vec/addv-s5-16  11.2ns ± 0%   9.1ns ± 1%  -19.11%  (p=0.000 n=10+10)
+Vec/addv-s6-16  12.6ns ± 0%   9.9ns ± 0%  -21.03%  (p=0.000 n=8+9)
+Vec/addv-s7-16  12.6ns ± 1%  10.9ns ± 1%  -12.97%  (p=0.000 n=10+10)
+Vec/addv-s8-16  13.8ns ± 1%  11.4ns ± 0%  -17.15%  (p=0.000 n=10+8)
+Vec/addv-s9-16  16.6ns ± 0%  13.4ns ± 1%  -19.04%  (p=0.000 n=8+10)
+
+Vec/addp-s0-16  2.23ns ± 1%  2.20ns ± 0%   -1.52%  (p=0.000 n=10+9)
+Vec/addp-s1-16  2.61ns ± 1%  2.20ns ± 0%  -15.58%  (p=0.000 n=10+8)
+Vec/addp-s2-16  2.95ns ± 1%  2.20ns ± 0%  -25.40%  (p=0.000 n=10+10)
+Vec/addp-s3-16  3.34ns ± 0%  2.21ns ± 1%  -34.03%  (p=0.000 n=9+10)
+Vec/addp-s4-16  3.78ns ± 0%  2.37ns ± 1%  -37.21%  (p=0.000 n=8+9)
+Vec/addp-s5-16  4.31ns ± 1%  2.74ns ± 1%  -36.36%  (p=0.000 n=10+10)
+Vec/addp-s6-16  4.76ns ± 0%  3.17ns ± 0%  -33.47%  (p=0.000 n=9+10)
+Vec/addp-s7-16  5.29ns ± 1%  3.27ns ± 1%  -38.11%  (p=0.000 n=10+10)
+Vec/addp-s8-16  5.69ns ± 1%  3.29ns ± 0%  -42.14%  (p=0.000 n=9+10)
+Vec/addp-s9-16  6.67ns ± 1%  3.37ns ± 0%  -49.47%  (p=0.000 n=10+10)
diff --git a/pointer-params/fields/fields.go b/pointer-params/fields/fields.go
@@ -0,0 +1,3 @@
+//go:generate go run gen.go
+
+package fields
diff --git a/pointer-params/fields/gen.go b/pointer-params/fields/gen.go
@@ -0,0 +1,136 @@
+// +build ignore
+
+package main
+
+import (
+	"bytes"
+	"fmt"
+	"go/format"
+	"io/ioutil"
+	"strings"
+	"text/template"
+)
+
+var (
+	head = `// Code generated by go run gen.go; DO NOT EDIT.
+package fields_test
+
+import "testing"
+`
+	structTmpl = template.Must(template.New("ss").Parse(`
+type {{.Name}} struct {
+	{{.Properties}}
+}
+
+func (s {{.Name}}) addv(ss {{.Name}}) {{.Name}} {
+	return {{.Name}}{
+		{{.Addv}}
+	}
+}
+
+func (s *{{.Name}}) addp(ss *{{.Name}}) *{{.Name}} {
+	{{.Addp}}
+	return s
+}
+`))
+	benchHead = `func BenchmarkVec(b *testing.B) {`
+	benchTail = `}`
+	benchBody = template.Must(template.New("bench").Parse(`
+	b.Run("addv-{{.Name}}", func(b *testing.B) {
+		{{.InitV}}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if i%2 == 0 {
+				v1 = v1.addv(v2)
+			} else {
+				v2 = v2.addv(v1)
+			}
+		}
+	})
+	b.Run("addp-{{.Name}}", func(b *testing.B) {
+		{{.InitP}}
+		b.ResetTimer()
+		for i := 0; i < b.N; i++ {
+			if i%2 == 0 {
+				v1 = v1.addp(v2)
+			} else {
+				v2 = v2.addp(v1)
+			}
+		}
+	})
+`))
+)
+
+type structFields struct {
+	Name       string
+	Properties string
+	Addv       string
+	Addp       string
+}
+type benchFields struct {
+	Name  string
+	InitV string
+	InitP string
+}
+
+func main() {
+	w := new(bytes.Buffer)
+	w.WriteString(head)
+
+	N := 10
+
+	for i := 0; i < N; i++ {
+		var (
+			ps   = []string{}
+			adv  = []string{}
+			adpl = []string{}
+			adpr = []string{}
+		)
+		for j := 0; j <= i; j++ {
+			ps = append(ps, fmt.Sprintf("x%d\tfloat64", j))
+			adv = append(adv, fmt.Sprintf("s.x%d + ss.x%d,", j, j))
+			adpl = append(adpl, fmt.Sprintf("s.x%d", j))
+			adpr = append(adpr, fmt.Sprintf("s.x%d + ss.x%d", j, j))
+		}
+		err := structTmpl.Execute(w, structFields{
+			Name:       fmt.Sprintf("s%d", i),
+			Properties: strings.Join(ps, "\n"),
+			Addv:       strings.Join(adv, "\n"),
+			Addp:       strings.Join(adpl, ",") + " = " + strings.Join(adpr, ","),
+		})
+		if err != nil {
+			panic(err)
+		}
+	}
+
+	w.WriteString(benchHead)
+	for i := 0; i < N; i++ {
+		nums1, nums2 := []string{}, []string{}
+		for j := 0; j <= i; j++ {
+			nums1 = append(nums1, fmt.Sprintf("%d", j))
+			nums2 = append(nums2, fmt.Sprintf("%d", j+i))
+		}
+		numstr1 := strings.Join(nums1, ", ")
+		numstr2 := strings.Join(nums2, ", ")
+
+		err := benchBody.Execute(w, benchFields{
+			Name: fmt.Sprintf("s%d", i),
+			InitV: fmt.Sprintf(`v1 := s%d{%s}
+v2 := s%d{%s}`, i, numstr1, i, numstr2),
+			InitP: fmt.Sprintf(`v1 := &s%d{%s}
+			v2 := &s%d{%s}`, i, numstr1, i, numstr2),
+		})
+		if err != nil {
+			panic(err)
+		}
+	}
+	w.WriteString(benchTail)
+
+	out, err := format.Source(w.Bytes())
+	if err != nil {
+		panic(err)
+	}
+	if err := ioutil.WriteFile("impl_test.go", out, 0660); err != nil {
+		panic(err)
+	}
+}