[openssl-dev] [PATCH] Insert CFI directives in x86_64 SHA1 implementation to enable backtracing
Matt Cross
matt.cross at gmail.com
Wed Mar 25 18:56:15 UTC 2015
I am working with something that does a lot of SHA1's. I am trying to
profile my application and generate flame graphs (see
http://www.brendangregg.com/flamegraphs.html ), but profiling tools cannot
successfully backtrace when the processor is running the optimized SHA1
code on x86_64. This patch adds CFI directives when compiled with a GNU
assembler to enable tools that understand DWARF debugging information to
backtrace in this circumstance.
I don't have a build environment for win64, but I did verify that the perl
code does not generate the CFI directives if we are not generating code for
the GNU assembler (IE if $cfi is not set).
-Matt
commit 9522d706fa58679abd0b6f923aad623fad39abe5
Author: Matt Cross <matt.cross at gmail.com>
Date: Wed Mar 25 14:15:37 2015 -0400
Add CFI directives to the x86_64 SHA1 implementation to allow DWARF
aware utilities to backtrace through these routines.
diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl
index 9bb6b49..9fe7b2b 100755
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -95,6 +95,7 @@ die "can't locate x86_64-xlate.pl";
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
$avx = ($1>=2.19) + ($1>=2.22);
+ $cfi = 1
}
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
@@ -247,6 +248,8 @@ $code.=<<___;
.type sha1_block_data_order,\@function,3
.align 16
sha1_block_data_order:
+`".cfi_startproc" if $cfi`
+
mov OPENSSL_ia32cap_P+0(%rip),%r9d
mov OPENSSL_ia32cap_P+4(%rip),%r8d
mov OPENSSL_ia32cap_P+8(%rip),%r10d
@@ -275,17 +278,35 @@ $code.=<<___;
.align 16
.Lialu:
mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
push %rbx
+# The CFA (Cononical Frame Address) is after the pushed return value, so
RBX was just stored at CFA - 16:
+`".cfi_offset rbx,-16" if $cfi`
push %rbp
+`".cfi_offset rbp,-24" if $cfi`
push %r12
+`".cfi_offset r12,-32" if $cfi`
push %r13
+`".cfi_offset r13,-40" if $cfi`
push %r14
+`".cfi_offset r14,-48" if $cfi`
mov %rdi,$ctx # reassigned argument
sub \$`8+16*4`,%rsp
mov %rsi,$inp # reassigned argument
and \$-64,%rsp
mov %rdx,$num # reassigned argument
mov %rax,`16*4`(%rsp)
+# This adds a "CFA expression" to say that the CFA is calculated by
reading the value at RSP+0x40, and adding 8 to it:
+# DW_CFA_def_cfa_expression 0x0f : says CFA is calculated by
evaluating the following expression
+# BLOCK
+# length (ULEB128) 0x06 : number of bytes remaining
+# DW_OP_breg7 0x40 0x77 0xc0 0x00 : read RSP, add 0x40, and
push onto stack - note SLEB128 encoding of 0x40
+# requires 2 bytes to avoid
sign extension
+# DW_OP_deref 0x06 : read from addr on top of
stack
+# DW_OP_plus_uconst 0x8 0x23 0x08 : pop top of stack, add 8,
push back onto stack
+
+`".cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08" if $cfi`
+
.Lprologue:
mov 0($ctx),$A
@@ -319,14 +340,22 @@ $code.=<<___;
jnz .Lloop
mov `16*4`(%rsp),%rsi
+`".cfi_def_cfa rsi,8" if $cfi`
mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
lea (%rsi),%rsp
+`".cfi_def_cfa rsp,8" if $cfi`
.Lepilogue:
ret
+`".cfi_endproc" if $cfi`
.size sha1_block_data_order,.-sha1_block_data_order
___
if ($shaext) {{{
@@ -342,6 +371,7 @@ $code.=<<___;
.align 32
sha1_block_data_order_shaext:
_shaext_shortcut:
+`".cfi_startproc" if $cfi`
___
$code.=<<___ if ($win64);
lea `-8-4*16`(%rsp),%rsp
@@ -440,6 +470,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
ret
+`".cfi_endproc" if $cfi`
.size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
___
}}}
@@ -473,12 +504,19 @@ $code.=<<___;
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
+`".cfi_startproc" if $cfi`
mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
push %rbx
+`".cfi_offset rbx,-16" if $cfi`
push %rbp
+`".cfi_offset rbp,-24" if $cfi`
push %r12
+`".cfi_offset r12,-32" if $cfi`
push %r13 # redundant, done to share Win64 SE handler
+`".cfi_offset r13,-40" if $cfi`
push %r14
+`".cfi_offset r14,-48" if $cfi`
lea `-64-($win64?6*16:0)`(%rsp),%rsp
___
$code.=<<___ if ($win64);
@@ -492,6 +530,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%r14 # original %rsp
+`".cfi_def_cfa_register r14" if $cfi`
and \$-64,%rsp
mov %rdi,$ctx # reassigned argument
mov %rsi,$inp # reassigned argument
@@ -907,14 +946,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r14),%rsi
+`".cfi_def_cfa_register rsi" if $cfi`
mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
lea (%rsi),%rsp
+`".cfi_def_cfa_register rsp" if $cfi`
.Lepilogue_ssse3:
ret
+`".cfi_endproc" if $cfi`
.size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
___
@@ -935,12 +982,19 @@ $code.=<<___;
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
+`".cfi_startproc" if $cfi`
mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
push %rbx
+`".cfi_offset rbx,-16" if $cfi`
push %rbp
+`".cfi_offset rbp,-24" if $cfi`
push %r12
+`".cfi_offset r12,-32" if $cfi`
push %r13 # redundant, done to share Win64 SE handler
+`".cfi_offset r13,-40" if $cfi`
push %r14
+`".cfi_offset r14,-48" if $cfi`
lea `-64-($win64?6*16:0)`(%rsp),%rsp
vzeroupper
___
@@ -955,6 +1009,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%r14 # original %rsp
+`".cfi_def_cfa_register r14" if $cfi`
and \$-64,%rsp
mov %rdi,$ctx # reassigned argument
mov %rsi,$inp # reassigned argument
@@ -1271,14 +1326,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r14),%rsi
+`".cfi_def_cfa_register rsi" if $cfi`
mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
lea (%rsi),%rsp
+`".cfi_def_cfa_register rsp" if $cfi`
.Lepilogue_avx:
ret
+`".cfi_endproc" if $cfi`
.size sha1_block_data_order_avx,.-sha1_block_data_order_avx
___
@@ -1302,12 +1365,19 @@ $code.=<<___;
.align 16
sha1_block_data_order_avx2:
_avx2_shortcut:
+`".cfi_startproc" if $cfi`
mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
push %rbx
+`".cfi_offset rbx,-16" if $cfi`
push %rbp
+`".cfi_offset rbp,-24" if $cfi`
push %r12
+`".cfi_offset r12,-32" if $cfi`
push %r13
+`".cfi_offset r13,-40" if $cfi`
push %r14
+`".cfi_offset r14,-48" if $cfi`
vzeroupper
___
$code.=<<___ if ($win64);
@@ -1322,6 +1392,7 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
mov %rax,%r14 # original %rsp
+`".cfi_def_cfa_register r14" if $cfi`
mov %rdi,$ctx # reassigned argument
mov %rsi,$inp # reassigned argument
mov %rdx,$num # reassigned argument
@@ -1750,14 +1821,22 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r14),%rsi
+`".cfi_def_cfa_register rsi" if $cfi`
mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
lea (%rsi),%rsp
+`".cfi_def_cfa_register rsp" if $cfi`
.Lepilogue_avx2:
ret
+`".cfi_endproc" if $cfi`
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
___
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mta.openssl.org/pipermail/openssl-dev/attachments/20150325/cb09f124/attachment-0001.html>
More information about the openssl-dev
mailing list