昨天晚上在地铁上,看了一篇公众号推送的文章—— 深度 | 从Go高性能日志库zap看如何实现高性能Go组件。
文章里提到了日志库使用fmt.Sprintf
带来的性能问题。我之前也写过一个基于fmt.Sprintf
的日志小轮子,当时也没有太关注这个方法的具体实现,今天有空,就稍微深入的研究一下源码吧。
我使用的go版本
go version go1.10.3 windows/amd64
一个简单的例子
package main
import (
"os"
"log"
"runtime/pprof"
"time"
"fmt"
)
type point struct {
x int
y int
}
func main() {
f, err := os.Create("sprintf_cpu.prof")
if err != nil {
log.Fatal("could not create CPU profile: ", err)
}
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatal("could not start CPU profile: ", err)
}
defer pprof.StopCPUProfile()
n := 10000000
startT := time.Now()
p := point{1, 2}
for i := 0; i < n; i++ {
//fmt.Sprintf("Number: %d", i)
fmt.Sprintf("Point: %v", p)
}
t := time.Now().Sub(startT).Nanoseconds()
fmt.Printf("Running in %d nanoseconds \n", t)
fmt.Printf("%d nanoseconds per operation \n", t/int64(n))
}
执行结果:
Running in 2392430400 nanoseconds
239 nanoseconds per operation
来看一下pprof:
go.exe tool pprof .\main.exe .\sprintf_cpu.prof
(pprof) top
Showing nodes accounting for 1410ms, 58.75% of 2400ms total
Dropped 21 nodes (cum <= 12ms)
Showing top 10 nodes out of 41
flat flat% sum% cum cum%
310ms 12.92% 12.92% 1030ms 42.92% fmt.(*pp).printValue
290ms 12.08% 25.00% 310ms 12.92% runtime.mallocgc
160ms 6.67% 31.67% 250ms 10.42% fmt.(*fmt).fmt_integer
140ms 5.83% 37.50% 1540ms 64.17% fmt.(*pp).doPrintf
100ms 4.17% 41.67% 100ms 4.17% reflect.Value.Field
90ms 3.75% 45.42% 90ms 3.75% runtime.(*itabTableType).find
80ms 3.33% 48.75% 280ms 11.67% fmt.(*pp).handleMethods
80ms 3.33% 52.08% 1400ms 58.33% fmt.(*pp).printArg
80ms 3.33% 55.42% 180ms 7.50% fmt.getField
80ms 3.33% 58.75% 80ms 3.33% runtime.memmove
Sprintf
Sprintf函数的实现非常简单 一共只有7行代码:
func Sprintf(format string, a ...interface{}) string {
p := newPrinter() // [1]
p.doPrintf(format, a) // [2]
s := string(p.buf) // [3]
p.free() // [4]
return s
}
[1] 调用newPrinter()
从对象池中获取一个pp
对象,如果没有就创建一个新的。pp
用来保存打印时的状态信息。
// newPrinter allocates a new pp struct or grabs a cached one.
func newPrinter() *pp {
p := ppFree.Get().(*pp)
p.panicking = false
p.erroring = false
p.fmt.init(&p.buf)
return p
}
[2] doPrintf
是核心的打印方法,后面具体分析。
[3] 把缓冲在p.buf
中的内容转换成string。
[4] 重置pp
中的状态,并把pp
放入ppFree
缓冲池。
// free saves used pp structs in ppFree; avoids an allocation per invocation.
func (p *pp) free() {
p.buf = p.buf[:0]
p.arg = nil
p.value = reflect.Value{}
ppFree.Put(p)
}
doPrintf
为了方便阅读,我加了一些中文注释
func (p *pp) doPrintf(format string, a []interface{}) {
end := len(format)
argNum := 0 // we process one argument per non-trivial format
afterIndex := false // previous item in format was an index like [3].
p.reordered = false
formatLoop:
for i := 0; i < end; {
p.goodArgNum = true
lasti := i
for i < end && format[i] != '%' {
i++ // 一直累加 直到遇到 ‘%’ 或者到达format的末尾
}
if i > lasti {
p.buf.WriteString(format[lasti:i]) // 输出普通字符
}
if i >= end {
// done processing format string
break
}
// Process one verb
i++ // 将索引后移 比如遇到了%d 那么i++之后就会指向d
// Do we have flags?
p.fmt.clearflags() // 确保fmtFlags被清空 可能并不需要这么做 但是加上比较稳妥
simpleFormat:
for ; i < end; i++ {
c := format[i]
// 记录 '#' '0' '+' '-' ' ' 标记 在打印不同类型的地方 会根据标记 有不同的处理
switch c {
case '#':
p.fmt.sharp = true
case '0':
p.fmt.zero = !p.fmt.minus // Only allow zero padding to the left.
case '+':
p.fmt.plus = true
case '-':
p.fmt.minus = true
p.fmt.zero = false // Do not pad with zeros to the right.
case ' ':
p.fmt.space = true
default:
// Fast path for common case of ascii lower case simple verbs
// without precision or width or argument indices.
if 'a' <= c && c <= 'z' && argNum < len(a) {
// %v 打印值的默认格式
if c == 'v' {
// %+v 如果是结构体 打印字段名
// Go syntax
p.fmt.sharpV = p.fmt.sharp
p.fmt.sharp = false
// %#v 用go的语法格式打印
// Struct-field syntax
p.fmt.plusV = p.fmt.plus
p.fmt.plus = false
}
// 打印参数
p.printArg(a[argNum], rune(c))
argNum++
i++
continue formatLoop
}
// Format is more complex than simple flags and a verb or is malformed.
break simpleFormat
}
}
// 处理使用[] 指定参数值索引的情况
// Do we have an explicit argument index?
argNum, i, afterIndex = p.argNumber(argNum, format, i, len(a))
// ‘*’ 通过参数指定宽度或精度
// Do we have width?
if i < end && format[i] == '*' {
i++
p.fmt.wid, p.fmt.widPresent, argNum = intFromArg(a, argNum)
if !p.fmt.widPresent {
p.buf.WriteString(badWidthString)
}
// We have a negative width, so take its value and ensure
// that the minus flag is set
if p.fmt.wid < 0 {
p.fmt.wid = -p.fmt.wid
p.fmt.minus = true
p.fmt.zero = false // Do not pad with zeros to the right.
}
afterIndex = false
} else {
// 使用数字指定宽度
p.fmt.wid, p.fmt.widPresent, i = parsenum(format, i, end)
if afterIndex && p.fmt.widPresent { // "%[3]2d"
p.goodArgNum = false
}
}
// 处理精度
// Do we have precision?
if i+1 < end && format[i] == '.' {
i++
if afterIndex { // "%[3].2d"
p.goodArgNum = false
}
argNum, i, afterIndex = p.argNumber(argNum, format, i, len(a))
if i < end && format[i] == '*' {
i++
p.fmt.prec, p.fmt.precPresent, argNum = intFromArg(a, argNum)
// Negative precision arguments don't make sense
if p.fmt.prec < 0 {
p.fmt.prec = 0
p.fmt.precPresent = false
}
if !p.fmt.precPresent {
p.buf.WriteString(badPrecString)
}
afterIndex = false
} else {
p.fmt.prec, p.fmt.precPresent, i = parsenum(format, i, end)
if !p.fmt.precPresent {
p.fmt.prec = 0
p.fmt.precPresent = true
}
}
}
if !afterIndex {
argNum, i, afterIndex = p.argNumber(argNum, format, i, len(a))
}
if i >= end {
p.buf.WriteString(noVerbString)
break
}
verb, size := rune(format[i]), 1
if verb >= utf8.RuneSelf {
verb, size = utf8.DecodeRuneInString(format[i:])
}
i += size
switch {
case verb == '%': // Percent does not absorb operands and ignores f.wid and f.prec.
p.buf.WriteByte('%') // 使用 ‘%%’ 来打印 ‘%’
case !p.goodArgNum:
p.badArgNum(verb)
case argNum >= len(a): // No argument left over to print for the current verb.
p.missingArg(verb) // 参数数量不足
case verb == 'v':
// Go syntax
p.fmt.sharpV = p.fmt.sharp
p.fmt.sharp = false
// Struct-field syntax
p.fmt.plusV = p.fmt.plus
p.fmt.plus = false
fallthrough
default:
p.printArg(a[argNum], verb)
argNum++
}
}
// 处理参数多于占位符的情况
// Check for extra arguments unless the call accessed the arguments
// out of order, in which case it's too expensive to detect if they've all
// been used and arguably OK if they're not.
if !p.reordered && argNum < len(a) {
p.fmt.clearflags()
p.buf.WriteString(extraString)
for i, arg := range a[argNum:] {
if i > 0 {
p.buf.WriteString(commaSpaceString)
}
if arg == nil {
p.buf.WriteString(nilAngleString)
} else {
p.buf.WriteString(reflect.TypeOf(arg).String())
p.buf.WriteByte('=')
p.printArg(arg, 'v')
}
}
p.buf.WriteByte(')')
}
}
printArg
printArg接收两个参数
arg:参数值
verb:占位符类型
方法中通过switch参数的type,分别处理不同的参数类型。
func (p *pp) printArg(arg interface{}, verb rune) {
p.arg = arg
p.value = reflect.Value{}
if arg == nil {
switch verb {
case 'T', 'v':
p.fmt.padString(nilAngleString)
default:
p.badVerb(verb)
}
return
}
// Special processing considerations.
// %T (the value's type) and %p (its address) are special; we always do them first.
switch verb {
case 'T': // 打印类型
p.fmt.fmt_s(reflect.TypeOf(arg).String())
return
case 'p': // 打印指针
p.fmtPointer(reflect.ValueOf(arg), 'p')
return
}
// Some types can be done without reflection.
switch f := arg.(type) {
case bool:
p.fmtBool(f, verb)
case float32:
p.fmtFloat(float64(f), 32, verb)
case float64:
p.fmtFloat(f, 64, verb)
case complex64:
p.fmtComplex(complex128(f), 64, verb)
case complex128:
p.fmtComplex(f, 128, verb)
case int:
p.fmtInteger(uint64(f), signed, verb)
case int8:
p.fmtInteger(uint64(f), signed, verb)
case int16:
p.fmtInteger(uint64(f), signed, verb)
case int32:
p.fmtInteger(uint64(f), signed, verb)
case int64:
p.fmtInteger(uint64(f), signed, verb)
case uint:
p.fmtInteger(uint64(f), unsigned, verb)
case uint8:
p.fmtInteger(uint64(f), unsigned, verb)
case uint16:
p.fmtInteger(uint64(f), unsigned, verb)
case uint32:
p.fmtInteger(uint64(f), unsigned, verb)
case uint64:
p.fmtInteger(f, unsigned, verb)
case uintptr:
p.fmtInteger(uint64(f), unsigned, verb)
case string:
p.fmtString(f, verb)
case []byte:
p.fmtBytes(f, verb, "[]byte")
case reflect.Value:
// Handle extractable values with special methods
// since printValue does not handle them at depth 0.
if f.IsValid() && f.CanInterface() {
p.arg = f.Interface()
if p.handleMethods(verb) {
return
}
}
p.printValue(f, verb, 0)
default:
// If the type is not simple, it might have methods.
if !p.handleMethods(verb) {
// Need to use reflection, since the type had no
// interface methods that could be used for formatting.
p.printValue(reflect.ValueOf(f), verb, 0)
}
}
}
handleMethods
func (p *pp) handleMethods(verb rune) (handled bool) {
if p.erroring {
return
}
// 参数类实现了Formatter接口 则调用Format方法
// Is it a Formatter?
if formatter, ok := p.arg.(Formatter); ok {
handled = true
defer p.catchPanic(p.arg, verb)
formatter.Format(p, verb)
return
}
// If we're doing Go syntax and the argument knows how to supply it, take care of it now.
if p.fmt.sharpV { // 如果使用%#v占位符
// 参数类实现了GoStringer接口 则调用GoString方法
if stringer, ok := p.arg.(GoStringer); ok {
handled = true
defer p.catchPanic(p.arg, verb)
// Print the result of GoString unadorned.
p.fmt.fmt_s(stringer.GoString())
return
}
} else {
// If a string is acceptable according to the format, see if
// the value satisfies one of the string-valued interfaces.
// Println etc. set verb to %v, which is "stringable".
switch verb {
case 'v', 's', 'x', 'X', 'q':
// error 和 Stringer 按照字符串处理
// Is it an error or Stringer?
// The duplication in the bodies is necessary:
// setting handled and deferring catchPanic
// must happen before calling the method.
switch v := p.arg.(type) {
case error:
handled = true
defer p.catchPanic(p.arg, verb)
p.fmtString(v.Error(), verb)
return
case Stringer:
handled = true
defer p.catchPanic(p.arg, verb)
p.fmtString(v.String(), verb)
return
}
}
}
// 返回true 表示参数已经被处理
return false
}
fmtInteger
printArg 方法中根据参数类型会有不用的处理方法,这里以fmtInteger
为例。
fmtInteger
中会把占位符转换成对应的进制,作为参数调用fmt_integer
。
// fmtInteger formats a signed or unsigned integer.
func (p *pp) fmtInteger(v uint64, isSigned bool, verb rune) {
switch verb {
case 'v':
if p.fmt.sharpV && !isSigned {
p.fmt0x64(v, true)
} else {
p.fmt.fmt_integer(v, 10, isSigned, ldigits)
}
case 'd':
p.fmt.fmt_integer(v, 10, isSigned, ldigits)
case 'b':
p.fmt.fmt_integer(v, 2, isSigned, ldigits)
case 'o':
p.fmt.fmt_integer(v, 8, isSigned, ldigits)
case 'x':
p.fmt.fmt_integer(v, 16, isSigned, ldigits)
case 'X':
p.fmt.fmt_integer(v, 16, isSigned, udigits)
case 'c':
p.fmt.fmt_c(v)
case 'q':
if v <= utf8.MaxRune {
p.fmt.fmt_qc(v)
} else {
p.badVerb(verb)
}
case 'U':
p.fmt.fmt_unicode(v)
default:
p.badVerb(verb)
}
}
fmt_integer
fmt_integer
中根据进制将数字转换成了字符串格式。(有点像OJ里面的热身题…)
最后调用pad
写到缓冲中。
// fmt_integer formats signed and unsigned integers.
func (f *fmt) fmt_integer(u uint64, base int, isSigned bool, digits string) {
negative := isSigned && int64(u) < 0
if negative {
u = -u
}
buf := f.intbuf[0:]
// The already allocated f.intbuf with a capacity of 68 bytes
// is large enough for integer formatting when no precision or width is set.
if f.widPresent || f.precPresent {
// Account 3 extra bytes for possible addition of a sign and "0x".
width := 3 + f.wid + f.prec // wid and prec are always positive.
if width > len(buf) {
// We're going to need a bigger boat.
buf = make([]byte, width)
}
}
// Two ways to ask for extra leading zero digits: %.3d or %03d.
// If both are specified the f.zero flag is ignored and
// padding with spaces is used instead.
prec := 0
if f.precPresent {
prec = f.prec
// Precision of 0 and value of 0 means "print nothing" but padding.
if prec == 0 && u == 0 {
oldZero := f.zero
f.zero = false
f.writePadding(f.wid)
f.zero = oldZero
return
}
} else if f.zero && f.widPresent {
prec = f.wid
if negative || f.plus || f.space {
prec-- // leave room for sign
}
}
// Because printing is easier right-to-left: format u into buf, ending at buf[i].
// We could make things marginally faster by splitting the 32-bit case out
// into a separate block but it's not worth the duplication, so u has 64 bits.
i := len(buf)
// Use constants for the division and modulo for more efficient code.
// Switch cases ordered by popularity.
switch base {
case 10:
for u >= 10 {
i--
next := u / 10
buf[i] = byte('0' + u - next*10)
u = next
}
case 16:
for u >= 16 {
i--
buf[i] = digits[u&0xF]
u >>= 4
}
case 8:
for u >= 8 {
i--
buf[i] = byte('0' + u&7)
u >>= 3
}
case 2:
for u >= 2 {
i--
buf[i] = byte('0' + u&1)
u >>= 1
}
default:
panic("fmt: unknown base; can't happen")
}
i--
buf[i] = digits[u]
for i > 0 && prec > len(buf)-i {
i--
buf[i] = '0'
}
// Various prefixes: 0x, -, etc.
if f.sharp {
switch base {
case 8:
if buf[i] != '0' {
i--
buf[i] = '0'
}
case 16:
// Add a leading 0x or 0X.
i--
buf[i] = digits[16]
i--
buf[i] = '0'
}
}
if negative {
i--
buf[i] = '-'
} else if f.plus {
i--
buf[i] = '+'
} else if f.space {
i--
buf[i] = ' '
}
// Left padding with zeros has already been handled like precision earlier
// or the f.zero flag is ignored due to an explicitly set precision.
oldZero := f.zero
f.zero = false
f.pad(buf[i:])
f.zero = oldZero
}
printValue
printValue
和printArg
非常相似,区别在于接收的参数类型为reflect.Value
。
对于reflect.Map
、reflect.Struct
、reflect.Interface
类型,会通过递归的方式打印。
// printValue is similar to printArg but starts with a reflect value, not an interface{} value.
// It does not handle 'p' and 'T' verbs because these should have been already handled by printArg.
func (p *pp) printValue(value reflect.Value, verb rune, depth int) {
// Handle values with special methods if not already handled by printArg (depth == 0).
if depth > 0 && value.IsValid() && value.CanInterface() {
p.arg = value.Interface()
if p.handleMethods(verb) {
return
}
}
p.arg = nil
p.value = value
switch f := value; value.Kind() {
case reflect.Invalid:
if depth == 0 {
p.buf.WriteString(invReflectString)
} else {
switch verb {
case 'v':
p.buf.WriteString(nilAngleString)
default:
p.badVerb(verb)
}
}
case reflect.Bool:
p.fmtBool(f.Bool(), verb)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
p.fmtInteger(uint64(f.Int()), signed, verb)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
p.fmtInteger(f.Uint(), unsigned, verb)
case reflect.Float32:
p.fmtFloat(f.Float(), 32, verb)
case reflect.Float64:
p.fmtFloat(f.Float(), 64, verb)
case reflect.Complex64:
p.fmtComplex(f.Complex(), 64, verb)
case reflect.Complex128:
p.fmtComplex(f.Complex(), 128, verb)
case reflect.String:
p.fmtString(f.String(), verb)
case reflect.Map:
if p.fmt.sharpV {
p.buf.WriteString(f.Type().String())
if f.IsNil() {
p.buf.WriteString(nilParenString)
return
}
p.buf.WriteByte('{')
} else {
p.buf.WriteString(mapString)
}
keys := f.MapKeys()
for i, key := range keys {
if i > 0 {
if p.fmt.sharpV {
p.buf.WriteString(commaSpaceString)
} else {
p.buf.WriteByte(' ')
}
}
p.printValue(key, verb, depth+1)
p.buf.WriteByte(':')
p.printValue(f.MapIndex(key), verb, depth+1)
}
if p.fmt.sharpV {
p.buf.WriteByte('}')
} else {
p.buf.WriteByte(']')
}
case reflect.Struct:
if p.fmt.sharpV {
p.buf.WriteString(f.Type().String())
}
p.buf.WriteByte('{')
for i := 0; i < f.NumField(); i++ {
if i > 0 {
if p.fmt.sharpV {
p.buf.WriteString(commaSpaceString)
} else {
p.buf.WriteByte(' ')
}
}
if p.fmt.plusV || p.fmt.sharpV {
if name := f.Type().Field(i).Name; name != "" {
p.buf.WriteString(name)
p.buf.WriteByte(':')
}
}
p.printValue(getField(f, i), verb, depth+1)
}
p.buf.WriteByte('}')
case reflect.Interface:
value := f.Elem()
if !value.IsValid() {
if p.fmt.sharpV {
p.buf.WriteString(f.Type().String())
p.buf.WriteString(nilParenString)
} else {
p.buf.WriteString(nilAngleString)
}
} else {
p.printValue(value, verb, depth+1)
}
case reflect.Array, reflect.Slice:
switch verb {
case 's', 'q', 'x', 'X':
// Handle byte and uint8 slices and arrays special for the above verbs.
t := f.Type()
if t.Elem().Kind() == reflect.Uint8 {
var bytes []byte
if f.Kind() == reflect.Slice {
bytes = f.Bytes()
} else if f.CanAddr() {
bytes = f.Slice(0, f.Len()).Bytes()
} else {
// We have an array, but we cannot Slice() a non-addressable array,
// so we build a slice by hand. This is a rare case but it would be nice
// if reflection could help a little more.
bytes = make([]byte, f.Len())
for i := range bytes {
bytes[i] = byte(f.Index(i).Uint())
}
}
p.fmtBytes(bytes, verb, t.String())
return
}
}
if p.fmt.sharpV {
p.buf.WriteString(f.Type().String())
if f.Kind() == reflect.Slice && f.IsNil() {
p.buf.WriteString(nilParenString)
return
}
p.buf.WriteByte('{')
for i := 0; i < f.Len(); i++ {
if i > 0 {
p.buf.WriteString(commaSpaceString)
}
p.printValue(f.Index(i), verb, depth+1)
}
p.buf.WriteByte('}')
} else {
p.buf.WriteByte('[')
for i := 0; i < f.Len(); i++ {
if i > 0 {
p.buf.WriteByte(' ')
}
p.printValue(f.Index(i), verb, depth+1)
}
p.buf.WriteByte(']')
}
case reflect.Ptr:
// pointer to array or slice or struct? ok at top level
// but not embedded (avoid loops)
if depth == 0 && f.Pointer() != 0 {
switch a := f.Elem(); a.Kind() {
case reflect.Array, reflect.Slice, reflect.Struct, reflect.Map:
p.buf.WriteByte('&')
p.printValue(a, verb, depth+1)
return
}
}
fallthrough
case reflect.Chan, reflect.Func, reflect.UnsafePointer:
p.fmtPointer(f, verb)
default:
p.unknownType(f)
}
}
总结
fmt.Sprintf
因为反射而损失了部分性能,因此在性能关键的地方应该减少应用。
根据benchmark的结果,我认为大部分情况下,这样的性能消耗并不会产生很大的影响。可以根据实际情况进行评估。