Skip to content

Commit

Permalink
article: add more discussion about struct size
Browse files Browse the repository at this point in the history
  • Loading branch information
changkun committed Nov 5, 2020
1 parent 7721772 commit 77a03a0
Show file tree
Hide file tree
Showing 9 changed files with 1,318 additions and 1 deletion.
208 changes: 207 additions & 1 deletion pointer-params.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Author(s): [Changkun Ou](https://changkun.de)

Last updated: 2020-10-27
Last updated: 2020-11-05

## Introduction

Expand Down Expand Up @@ -213,6 +213,212 @@ copies the parameter to different registers (e.g., copy pointers to AX and CX),
then write back when returning. Therefore, with inline disabled, the reason that `addv`
is slower than `addp` is caused by different memory access pattern.

## Conclusion

Can pass by value always faster than pass by pointer? We could do a further test.
But this time, we need use a generator to generate all possible cases. Here
is how we could do it:

```go
// gen.go

// +build ignore

package main

import (
"bytes"
"fmt"
"go/format"
"io/ioutil"
"strings"
"text/template"
)

var (
head = `// Code generated by go run gen.go; DO NOT EDIT.
package fields_test
import "testing"
`
structTmpl = template.Must(template.New("ss").Parse(`
type {{.Name}} struct {
{{.Properties}}
}
func (s {{.Name}}) addv(ss {{.Name}}) {{.Name}} {
return {{.Name}}{
{{.Addv}}
}
}
func (s *{{.Name}}) addp(ss *{{.Name}}) *{{.Name}} {
{{.Addp}}
return s
}
`))
benchHead = `func BenchmarkVec(b *testing.B) {`
benchTail = `}`
benchBody = template.Must(template.New("bench").Parse(`
b.Run("addv-{{.Name}}", func(b *testing.B) {
{{.InitV}}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if i%2 == 0 {
v1 = v1.addv(v2)
} else {
v2 = v2.addv(v1)
}
}
})
b.Run("addp-{{.Name}}", func(b *testing.B) {
{{.InitP}}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if i%2 == 0 {
v1 = v1.addp(v2)
} else {
v2 = v2.addp(v1)
}
}
})
`))
)

type structFields struct {
Name string
Properties string
Addv string
Addp string
}
type benchFields struct {
Name string
InitV string
InitP string
}

func main() {
w := new(bytes.Buffer)
w.WriteString(head)

N := 10

for i := 0; i < N; i++ {
var (
ps = []string{}
adv = []string{}
adpl = []string{}
adpr = []string{}
)
for j := 0; j <= i; j++ {
ps = append(ps, fmt.Sprintf("x%d\tfloat64", j))
adv = append(adv, fmt.Sprintf("s.x%d + ss.x%d,", j, j))
adpl = append(adpl, fmt.Sprintf("s.x%d", j))
adpr = append(adpr, fmt.Sprintf("s.x%d + ss.x%d", j, j))
}
err := structTmpl.Execute(w, structFields{
Name: fmt.Sprintf("s%d", i),
Properties: strings.Join(ps, "\n"),
Addv: strings.Join(adv, "\n"),
Addp: strings.Join(adpl, ",") + " = " + strings.Join(adpr, ","),
})
if err != nil {
panic(err)
}
}

w.WriteString(benchHead)
for i := 0; i < N; i++ {
nums1, nums2 := []string{}, []string{}
for j := 0; j <= i; j++ {
nums1 = append(nums1, fmt.Sprintf("%d", j))
nums2 = append(nums2, fmt.Sprintf("%d", j+i))
}
numstr1 := strings.Join(nums1, ", ")
numstr2 := strings.Join(nums2, ", ")

err := benchBody.Execute(w, benchFields{
Name: fmt.Sprintf("s%d", i),
InitV: fmt.Sprintf(`v1 := s%d{%s}
v2 := s%d{%s}`, i, numstr1, i, numstr2),
InitP: fmt.Sprintf(`v1 := &s%d{%s}
v2 := &s%d{%s}`, i, numstr1, i, numstr2),
})
if err != nil {
panic(err)
}
}
w.WriteString(benchTail)

out, err := format.Source(w.Bytes())
if err != nil {
panic(err)
}
if err := ioutil.WriteFile("impl_test.go", out, 0660); err != nil {
panic(err)
}
}
```

If we generate our test code and perform the same benchmark procedure again:

```bash
$ go generate
$ perflock -governor 80% go test -v -run=none -bench=. -count=10 | tee inline.txt
$ benchstat inline.txt
name time/op
Vec/addv-s0-16 0.25ns ± 0%
Vec/addp-s0-16 2.20ns ± 0%
Vec/addv-s1-16 0.49ns ± 1%
Vec/addp-s1-16 2.20ns ± 0%
Vec/addv-s2-16 0.25ns ± 1%
Vec/addp-s2-16 2.20ns ± 0%
Vec/addv-s3-16 0.49ns ± 2%
Vec/addp-s3-16 2.21ns ± 1%
Vec/addv-s4-16 8.29ns ± 0%
Vec/addp-s4-16 2.37ns ± 1%
Vec/addv-s5-16 9.06ns ± 1%
Vec/addp-s5-16 2.74ns ± 1%
Vec/addv-s6-16 9.9ns ± 0%
Vec/addp-s6-16 3.17ns ± 0%
Vec/addv-s7-16 10.9ns ± 1%
Vec/addp-s7-16 3.27ns ± 1%
Vec/addv-s8-16 11.4ns ± 0%
Vec/addp-s8-16 3.29ns ± 0%
Vec/addv-s9-16 13.4ns ± 1%
Vec/addp-s9-16 3.37ns ± 0%
```

We could even further try a version that disables inline:

```diff
structTmpl = template.Must(template.New("ss").Parse(`
type {{.Name}} struct {
{{.Properties}}
}
+//go:noinline
func (s {{.Name}}) addv(ss {{.Name}}) {{.Name}} {
return {{.Name}}{
{{.Addv}}
}
}
+//go:noinline
func (s *{{.Name}}) addp(ss *{{.Name}}) *{{.Name}} {
{{.Addp}}
return s
}
`))
```

Eventually, we will endup with the following results:

![](./pointer-params/vis.png)

TLDR: The above figure basically demonstrates when should you pass-by-value
or pass-by-pointer. If you are certain that your code won't produce any escape
variables, and the size of your argument is smaller than 4*4 = 16 bytes,
then you should go for pass-by-value; otherwise, you should keep using pointers.

## Further Reading Suggestions

- Changkun Ou. Conduct Reliable Benchmarking in Go. March 26, 2020. https://golang.design/s/gobench
Expand Down
4 changes: 4 additions & 0 deletions pointer-params/fields/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
all:
go generate
perflock -governor 80% go test -v -run=none -bench=. -count=10 | tee inline.txt
benchstat inline.txt
21 changes: 21 additions & 0 deletions pointer-params/fields/benchstat.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Vec/addv-s0-16 3.64ns ± 1% 0.25ns ± 0% -93.24% (p=0.000 n=10+9)
Vec/addv-s1-16 3.82ns ± 0% 0.49ns ± 1% -87.10% (p=0.000 n=10+8)
Vec/addv-s2-16 4.29ns ± 0% 0.25ns ± 1% -94.28% (p=0.000 n=9+10)
Vec/addv-s3-16 5.00ns ± 1% 0.49ns ± 2% -90.12% (p=0.000 n=10+10)
Vec/addv-s4-16 10.5ns ± 0% 8.3ns ± 0% -21.08% (p=0.000 n=10+8)
Vec/addv-s5-16 11.2ns ± 0% 9.1ns ± 1% -19.11% (p=0.000 n=10+10)
Vec/addv-s6-16 12.6ns ± 0% 9.9ns ± 0% -21.03% (p=0.000 n=8+9)
Vec/addv-s7-16 12.6ns ± 1% 10.9ns ± 1% -12.97% (p=0.000 n=10+10)
Vec/addv-s8-16 13.8ns ± 1% 11.4ns ± 0% -17.15% (p=0.000 n=10+8)
Vec/addv-s9-16 16.6ns ± 0% 13.4ns ± 1% -19.04% (p=0.000 n=8+10)

Vec/addp-s0-16 2.23ns ± 1% 2.20ns ± 0% -1.52% (p=0.000 n=10+9)
Vec/addp-s1-16 2.61ns ± 1% 2.20ns ± 0% -15.58% (p=0.000 n=10+8)
Vec/addp-s2-16 2.95ns ± 1% 2.20ns ± 0% -25.40% (p=0.000 n=10+10)
Vec/addp-s3-16 3.34ns ± 0% 2.21ns ± 1% -34.03% (p=0.000 n=9+10)
Vec/addp-s4-16 3.78ns ± 0% 2.37ns ± 1% -37.21% (p=0.000 n=8+9)
Vec/addp-s5-16 4.31ns ± 1% 2.74ns ± 1% -36.36% (p=0.000 n=10+10)
Vec/addp-s6-16 4.76ns ± 0% 3.17ns ± 0% -33.47% (p=0.000 n=9+10)
Vec/addp-s7-16 5.29ns ± 1% 3.27ns ± 1% -38.11% (p=0.000 n=10+10)
Vec/addp-s8-16 5.69ns ± 1% 3.29ns ± 0% -42.14% (p=0.000 n=9+10)
Vec/addp-s9-16 6.67ns ± 1% 3.37ns ± 0% -49.47% (p=0.000 n=10+10)
3 changes: 3 additions & 0 deletions pointer-params/fields/fields.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
//go:generate go run gen.go

package fields
136 changes: 136 additions & 0 deletions pointer-params/fields/gen.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
// +build ignore

package main

import (
"bytes"
"fmt"
"go/format"
"io/ioutil"
"strings"
"text/template"
)

var (
head = `// Code generated by go run gen.go; DO NOT EDIT.
package fields_test
import "testing"
`
structTmpl = template.Must(template.New("ss").Parse(`
type {{.Name}} struct {
{{.Properties}}
}
func (s {{.Name}}) addv(ss {{.Name}}) {{.Name}} {
return {{.Name}}{
{{.Addv}}
}
}
func (s *{{.Name}}) addp(ss *{{.Name}}) *{{.Name}} {
{{.Addp}}
return s
}
`))
benchHead = `func BenchmarkVec(b *testing.B) {`
benchTail = `}`
benchBody = template.Must(template.New("bench").Parse(`
b.Run("addv-{{.Name}}", func(b *testing.B) {
{{.InitV}}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if i%2 == 0 {
v1 = v1.addv(v2)
} else {
v2 = v2.addv(v1)
}
}
})
b.Run("addp-{{.Name}}", func(b *testing.B) {
{{.InitP}}
b.ResetTimer()
for i := 0; i < b.N; i++ {
if i%2 == 0 {
v1 = v1.addp(v2)
} else {
v2 = v2.addp(v1)
}
}
})
`))
)

type structFields struct {
Name string
Properties string
Addv string
Addp string
}
type benchFields struct {
Name string
InitV string
InitP string
}

func main() {
w := new(bytes.Buffer)
w.WriteString(head)

N := 10

for i := 0; i < N; i++ {
var (
ps = []string{}
adv = []string{}
adpl = []string{}
adpr = []string{}
)
for j := 0; j <= i; j++ {
ps = append(ps, fmt.Sprintf("x%d\tfloat64", j))
adv = append(adv, fmt.Sprintf("s.x%d + ss.x%d,", j, j))
adpl = append(adpl, fmt.Sprintf("s.x%d", j))
adpr = append(adpr, fmt.Sprintf("s.x%d + ss.x%d", j, j))
}
err := structTmpl.Execute(w, structFields{
Name: fmt.Sprintf("s%d", i),
Properties: strings.Join(ps, "\n"),
Addv: strings.Join(adv, "\n"),
Addp: strings.Join(adpl, ",") + " = " + strings.Join(adpr, ","),
})
if err != nil {
panic(err)
}
}

w.WriteString(benchHead)
for i := 0; i < N; i++ {
nums1, nums2 := []string{}, []string{}
for j := 0; j <= i; j++ {
nums1 = append(nums1, fmt.Sprintf("%d", j))
nums2 = append(nums2, fmt.Sprintf("%d", j+i))
}
numstr1 := strings.Join(nums1, ", ")
numstr2 := strings.Join(nums2, ", ")

err := benchBody.Execute(w, benchFields{
Name: fmt.Sprintf("s%d", i),
InitV: fmt.Sprintf(`v1 := s%d{%s}
v2 := s%d{%s}`, i, numstr1, i, numstr2),
InitP: fmt.Sprintf(`v1 := &s%d{%s}
v2 := &s%d{%s}`, i, numstr1, i, numstr2),
})
if err != nil {
panic(err)
}
}
w.WriteString(benchTail)

out, err := format.Source(w.Bytes())
if err != nil {
panic(err)
}
if err := ioutil.WriteFile("impl_test.go", out, 0660); err != nil {
panic(err)
}
}
Loading

0 comments on commit 77a03a0

Please sign in to comment.