[openssl-dev] [PATCH] Insert CFI directives in x86_64 SHA1 implementation to enable backtracing

Matt Cross matt.cross at gmail.com
Wed Mar 25 18:56:15 UTC 2015


I am working with something that does a lot of SHA1's.  I am trying to
profile my application and generate flame graphs (see
http://www.brendangregg.com/flamegraphs.html ), but profiling tools cannot
successfully backtrace when the processor is running the optimized SHA1
code on x86_64.  This patch adds CFI directives when compiled with a GNU
assembler to enable tools that understand DWARF debugging information to
backtrace in this circumstance.

I don't have a build environment for win64, but I did verify that the perl
code does not generate the CFI directives if we are not generating code for
the GNU assembler (IE if $cfi is not set).

    -Matt


commit 9522d706fa58679abd0b6f923aad623fad39abe5
Author: Matt Cross <matt.cross at gmail.com>
Date:   Wed Mar 25 14:15:37 2015 -0400

    Add CFI directives to the x86_64 SHA1 implementation to allow DWARF
aware utilities to backtrace through these routines.

diff --git a/crypto/sha/asm/sha1-x86_64.pl b/crypto/sha/asm/sha1-x86_64.pl
index 9bb6b49..9fe7b2b 100755
--- a/crypto/sha/asm/sha1-x86_64.pl
+++ b/crypto/sha/asm/sha1-x86_64.pl
@@ -95,6 +95,7 @@ die "can't locate x86_64-xlate.pl";
 if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
  =~ /GNU assembler version ([2-9]\.[0-9]+)/) {
  $avx = ($1>=2.19) + ($1>=2.22);
+ $cfi = 1
 }

 if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
@@ -247,6 +248,8 @@ $code.=<<___;
 .type sha1_block_data_order,\@function,3
 .align 16
 sha1_block_data_order:
+`".cfi_startproc" if $cfi`
+
  mov OPENSSL_ia32cap_P+0(%rip),%r9d
  mov OPENSSL_ia32cap_P+4(%rip),%r8d
  mov OPENSSL_ia32cap_P+8(%rip),%r10d
@@ -275,17 +278,35 @@ $code.=<<___;
 .align 16
 .Lialu:
  mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
  push %rbx
+# The CFA (Cononical Frame Address) is after the pushed return value, so
RBX was just stored at CFA - 16:
+`".cfi_offset rbx,-16" if $cfi`
  push %rbp
+`".cfi_offset rbp,-24" if $cfi`
  push %r12
+`".cfi_offset r12,-32" if $cfi`
  push %r13
+`".cfi_offset r13,-40" if $cfi`
  push %r14
+`".cfi_offset r14,-48" if $cfi`
  mov %rdi,$ctx # reassigned argument
  sub \$`8+16*4`,%rsp
  mov %rsi,$inp # reassigned argument
  and \$-64,%rsp
  mov %rdx,$num # reassigned argument
  mov %rax,`16*4`(%rsp)
+# This adds a "CFA expression" to say that the CFA is calculated by
reading the value at RSP+0x40, and adding 8 to it:
+# DW_CFA_def_cfa_expression    0x0f           : says CFA is calculated by
evaluating the following expression
+# BLOCK
+#   length (ULEB128)           0x06           : number of bytes remaining
+#   DW_OP_breg7 0x40           0x77 0xc0 0x00 : read RSP, add 0x40, and
push onto stack - note SLEB128 encoding of 0x40
+#                                               requires 2 bytes to avoid
sign extension
+#   DW_OP_deref                0x06           : read from addr on top of
stack
+#   DW_OP_plus_uconst 0x8      0x23 0x08      : pop top of stack, add 8,
push back onto stack
+
+`".cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08" if $cfi`
+
 .Lprologue:

  mov 0($ctx),$A
@@ -319,14 +340,22 @@ $code.=<<___;
  jnz .Lloop

  mov `16*4`(%rsp),%rsi
+`".cfi_def_cfa rsi,8" if $cfi`
  mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
  mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
  mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
  mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
  mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
  lea (%rsi),%rsp
+`".cfi_def_cfa rsp,8" if $cfi`
 .Lepilogue:
  ret
+`".cfi_endproc" if $cfi`
 .size sha1_block_data_order,.-sha1_block_data_order
 ___
 if ($shaext) {{{
@@ -342,6 +371,7 @@ $code.=<<___;
 .align 32
 sha1_block_data_order_shaext:
 _shaext_shortcut:
+`".cfi_startproc" if $cfi`
 ___
 $code.=<<___ if ($win64);
  lea `-8-4*16`(%rsp),%rsp
@@ -440,6 +470,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  ret
+`".cfi_endproc" if $cfi`
 .size sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
 ___
 }}}
@@ -473,12 +504,19 @@ $code.=<<___;
 .align 16
 sha1_block_data_order_ssse3:
 _ssse3_shortcut:
+`".cfi_startproc" if $cfi`
  mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
  push %rbx
+`".cfi_offset rbx,-16" if $cfi`
  push %rbp
+`".cfi_offset rbp,-24" if $cfi`
  push %r12
+`".cfi_offset r12,-32" if $cfi`
  push %r13 # redundant, done to share Win64 SE handler
+`".cfi_offset r13,-40" if $cfi`
  push %r14
+`".cfi_offset r14,-48" if $cfi`
  lea `-64-($win64?6*16:0)`(%rsp),%rsp
 ___
 $code.=<<___ if ($win64);
@@ -492,6 +530,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  mov %rax,%r14 # original %rsp
+`".cfi_def_cfa_register r14" if $cfi`
  and \$-64,%rsp
  mov %rdi,$ctx # reassigned argument
  mov %rsi,$inp # reassigned argument
@@ -907,14 +946,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  lea (%r14),%rsi
+`".cfi_def_cfa_register rsi" if $cfi`
  mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
  mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
  mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
  mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
  mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
  lea (%rsi),%rsp
+`".cfi_def_cfa_register rsp" if $cfi`
 .Lepilogue_ssse3:
  ret
+`".cfi_endproc" if $cfi`
 .size sha1_block_data_order_ssse3,.-sha1_block_data_order_ssse3
 ___

@@ -935,12 +982,19 @@ $code.=<<___;
 .align 16
 sha1_block_data_order_avx:
 _avx_shortcut:
+`".cfi_startproc" if $cfi`
  mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
  push %rbx
+`".cfi_offset rbx,-16" if $cfi`
  push %rbp
+`".cfi_offset rbp,-24" if $cfi`
  push %r12
+`".cfi_offset r12,-32" if $cfi`
  push %r13 # redundant, done to share Win64 SE handler
+`".cfi_offset r13,-40" if $cfi`
  push %r14
+`".cfi_offset r14,-48" if $cfi`
  lea `-64-($win64?6*16:0)`(%rsp),%rsp
  vzeroupper
 ___
@@ -955,6 +1009,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  mov %rax,%r14 # original %rsp
+`".cfi_def_cfa_register r14" if $cfi`
  and \$-64,%rsp
  mov %rdi,$ctx # reassigned argument
  mov %rsi,$inp # reassigned argument
@@ -1271,14 +1326,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  lea (%r14),%rsi
+`".cfi_def_cfa_register rsi" if $cfi`
  mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
  mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
  mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
  mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
  mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
  lea (%rsi),%rsp
+`".cfi_def_cfa_register rsp" if $cfi`
 .Lepilogue_avx:
  ret
+`".cfi_endproc" if $cfi`
 .size sha1_block_data_order_avx,.-sha1_block_data_order_avx
 ___

@@ -1302,12 +1365,19 @@ $code.=<<___;
 .align 16
 sha1_block_data_order_avx2:
 _avx2_shortcut:
+`".cfi_startproc" if $cfi`
  mov %rsp,%rax
+`".cfi_def_cfa_register rax" if $cfi`
  push %rbx
+`".cfi_offset rbx,-16" if $cfi`
  push %rbp
+`".cfi_offset rbp,-24" if $cfi`
  push %r12
+`".cfi_offset r12,-32" if $cfi`
  push %r13
+`".cfi_offset r13,-40" if $cfi`
  push %r14
+`".cfi_offset r14,-48" if $cfi`
  vzeroupper
 ___
 $code.=<<___ if ($win64);
@@ -1322,6 +1392,7 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  mov %rax,%r14 # original %rsp
+`".cfi_def_cfa_register r14" if $cfi`
  mov %rdi,$ctx # reassigned argument
  mov %rsi,$inp # reassigned argument
  mov %rdx,$num # reassigned argument
@@ -1750,14 +1821,22 @@ $code.=<<___ if ($win64);
 ___
 $code.=<<___;
  lea (%r14),%rsi
+`".cfi_def_cfa_register rsi" if $cfi`
  mov -40(%rsi),%r14
+`".cfi_restore r14" if $cfi`
  mov -32(%rsi),%r13
+`".cfi_restore r13" if $cfi`
  mov -24(%rsi),%r12
+`".cfi_restore r12" if $cfi`
  mov -16(%rsi),%rbp
+`".cfi_restore rbp" if $cfi`
  mov -8(%rsi),%rbx
+`".cfi_restore rbx" if $cfi`
  lea (%rsi),%rsp
+`".cfi_def_cfa_register rsp" if $cfi`
 .Lepilogue_avx2:
  ret
+`".cfi_endproc" if $cfi`
 .size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
 ___
 }
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mta.openssl.org/pipermail/openssl-dev/attachments/20150325/cb09f124/attachment-0001.html>


More information about the openssl-dev mailing list