[openssl] master update

Dr. Paul Dale pauli at openssl.org
Thu Mar 18 04:13:43 UTC 2021


The branch master has been updated
       via  d07d8057991712261323c05bb022d000a01404d0 (commit)
       via  eb27d75788e7d53a2a43aacc25f23c2856b4065d (commit)
       via  ba64e5a92a6f009e311ad1c3565817820a1632a4 (commit)
       via  90165623a50fa30454c4a2f9bece7af2c611b731 (commit)
      from  11c7874d0c055450783252e68d97dc1de7151dc3 (commit)


- Log -----------------------------------------------------------------
commit d07d8057991712261323c05bb022d000a01404d0
Author: Beat Bolli <dev at drbeat.li>
Date:   Sun Feb 14 23:47:57 2021 +0100

    Add tests for the limited Unicode code point range
    
    Signed-off-by: Beat Bolli <dev at drbeat.li>
    
    Reviewed-by: Shane Lontis <shane.lontis at oracle.com>
    Reviewed-by: Paul Dale <pauli at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14185)

commit eb27d75788e7d53a2a43aacc25f23c2856b4065d
Author: Beat Bolli <dev at drbeat.li>
Date:   Sun Feb 14 23:47:15 2021 +0100

    ASN1: check the Unicode code point range in ASN1_mbstring_copy()
    
    Signed-off-by: Beat Bolli <dev at drbeat.li>
    
    Reviewed-by: Shane Lontis <shane.lontis at oracle.com>
    Reviewed-by: Paul Dale <pauli at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14185)

commit ba64e5a92a6f009e311ad1c3565817820a1632a4
Author: Beat Bolli <dev at drbeat.li>
Date:   Sun Feb 14 19:27:56 2021 +0100

    ASN1: limit the Unicode code point range in UTF8_getc() and UTF8_putc()
    
    Since the Unicode 4.0.0 standard, the valid code point range is U+0000
    to U+10FFFF. Make code points outside this range invalid when converting
    from/to UTF-8.
    
    Signed-off-by: Beat Bolli <dev at drbeat.li>
    
    Reviewed-by: Shane Lontis <shane.lontis at oracle.com>
    Reviewed-by: Paul Dale <pauli at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14185)

commit 90165623a50fa30454c4a2f9bece7af2c611b731
Author: Beat Bolli <dev at drbeat.li>
Date:   Tue Feb 16 19:15:45 2021 +0100

    ASN1: add an internal header to validate Unicode ranges
    
    Reviewed-by: Shane Lontis <shane.lontis at oracle.com>
    Reviewed-by: Paul Dale <pauli at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14185)

-----------------------------------------------------------------------

Summary of changes:
 crypto/asn1/a_mbstr.c      | 16 ++++++++--
 crypto/asn1/a_utf8.c       | 75 ++++++++--------------------------------------
 include/internal/unicode.h | 31 +++++++++++++++++++
 test/asn1_internal_test.c  | 41 +++++++++++++++++++++++++
 4 files changed, 97 insertions(+), 66 deletions(-)
 create mode 100644 include/internal/unicode.h

diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c
index 2af2445410..208a383af2 100644
--- a/crypto/asn1/a_mbstr.c
+++ b/crypto/asn1/a_mbstr.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the Apache License 2.0 (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -10,6 +10,7 @@
 #include <stdio.h>
 #include "crypto/ctype.h"
 #include "internal/cryptlib.h"
+#include "internal/unicode.h"
 #include <openssl/asn1.h>
 
 static int traverse_string(const unsigned char *p, int len, int inform,
@@ -242,6 +243,9 @@ static int traverse_string(const unsigned char *p, int len, int inform,
 static int in_utf8(unsigned long value, void *arg)
 {
     int *nchar;
+
+    if (!is_unicode_valid(value))
+        return -2;
     nchar = arg;
     (*nchar)++;
     return 1;
@@ -251,9 +255,13 @@ static int in_utf8(unsigned long value, void *arg)
 
 static int out_utf8(unsigned long value, void *arg)
 {
-    int *outlen;
+    int *outlen, len;
+
+    len = UTF8_putc(NULL, -1, value);
+    if (len <= 0)
+        return len;
     outlen = arg;
-    *outlen += UTF8_putc(NULL, -1, value);
+    *outlen += len;
     return 1;
 }
 
@@ -278,6 +286,8 @@ static int type_str(unsigned long value, void *arg)
         types &= ~B_ASN1_T61STRING;
     if ((types & B_ASN1_BMPSTRING) && (value > 0xffff))
         types &= ~B_ASN1_BMPSTRING;
+    if ((types & B_ASN1_UTF8STRING) && !is_unicode_valid(value))
+        types &= ~B_ASN1_UTF8STRING;
     if (!types)
         return -1;
     *((unsigned long *)arg) = types;
diff --git a/crypto/asn1/a_utf8.c b/crypto/asn1/a_utf8.c
index 143ae01db7..6572726cf1 100644
--- a/crypto/asn1/a_utf8.c
+++ b/crypto/asn1/a_utf8.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 1995-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved.
  *
  * Licensed under the Apache License 2.0 (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
@@ -9,6 +9,7 @@
 
 #include <stdio.h>
 #include "internal/cryptlib.h"
+#include "internal/unicode.h"
 #include <openssl/asn1.h>
 
 /* UTF8 utilities */
@@ -58,6 +59,8 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
         value |= *p++ & 0x3f;
         if (value < 0x800)
             return -4;
+        if (is_unicode_surrogate(value))
+            return -2;
         ret = 3;
     } else if ((*p & 0xf8) == 0xf0) {
         if (len < 4)
@@ -73,40 +76,6 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
         if (value < 0x10000)
             return -4;
         ret = 4;
-    } else if ((*p & 0xfc) == 0xf8) {
-        if (len < 5)
-            return -1;
-        if (((p[1] & 0xc0) != 0x80)
-            || ((p[2] & 0xc0) != 0x80)
-            || ((p[3] & 0xc0) != 0x80)
-            || ((p[4] & 0xc0) != 0x80))
-            return -3;
-        value = ((unsigned long)(*p++ & 0x3)) << 24;
-        value |= ((unsigned long)(*p++ & 0x3f)) << 18;
-        value |= ((unsigned long)(*p++ & 0x3f)) << 12;
-        value |= (*p++ & 0x3f) << 6;
-        value |= *p++ & 0x3f;
-        if (value < 0x200000)
-            return -4;
-        ret = 5;
-    } else if ((*p & 0xfe) == 0xfc) {
-        if (len < 6)
-            return -1;
-        if (((p[1] & 0xc0) != 0x80)
-            || ((p[2] & 0xc0) != 0x80)
-            || ((p[3] & 0xc0) != 0x80)
-            || ((p[4] & 0xc0) != 0x80)
-            || ((p[5] & 0xc0) != 0x80))
-            return -3;
-        value = ((unsigned long)(*p++ & 0x1)) << 30;
-        value |= ((unsigned long)(*p++ & 0x3f)) << 24;
-        value |= ((unsigned long)(*p++ & 0x3f)) << 18;
-        value |= ((unsigned long)(*p++ & 0x3f)) << 12;
-        value |= (*p++ & 0x3f) << 6;
-        value |= *p++ & 0x3f;
-        if (value < 0x4000000)
-            return -4;
-        ret = 6;
     } else
         return -2;
     *val = value;
@@ -116,15 +85,15 @@ int UTF8_getc(const unsigned char *str, int len, unsigned long *val)
 /*
  * This takes a character 'value' and writes the UTF8 encoded value in 'str'
  * where 'str' is a buffer containing 'len' characters. Returns the number of
- * characters written or -1 if 'len' is too small. 'str' can be set to NULL
- * in which case it just returns the number of characters. It will need at
- * most 6 characters.
+ * characters written, -1 if 'len' is too small or -2 if 'value' is out of
+ * range. 'str' can be set to NULL in which case it just returns the number of
+ * characters. It will need at most 4 characters.
  */
 
 int UTF8_putc(unsigned char *str, int len, unsigned long value)
 {
     if (!str)
-        len = 6;                /* Maximum we will need */
+        len = 4;                /* Maximum we will need */
     else if (len <= 0)
         return -1;
     if (value < 0x80) {
@@ -142,6 +111,8 @@ int UTF8_putc(unsigned char *str, int len, unsigned long value)
         return 2;
     }
     if (value < 0x10000) {
+        if (is_unicode_surrogate(value))
+            return -2;
         if (len < 3)
             return -1;
         if (str) {
@@ -151,7 +122,7 @@ int UTF8_putc(unsigned char *str, int len, unsigned long value)
         }
         return 3;
     }
-    if (value < 0x200000) {
+    if (value < UNICODE_LIMIT) {
         if (len < 4)
             return -1;
         if (str) {
@@ -162,27 +133,5 @@ int UTF8_putc(unsigned char *str, int len, unsigned long value)
         }
         return 4;
     }
-    if (value < 0x4000000) {
-        if (len < 5)
-            return -1;
-        if (str) {
-            *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
-            *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
-            *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
-            *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
-            *str = (unsigned char)((value & 0x3f) | 0x80);
-        }
-        return 5;
-    }
-    if (len < 6)
-        return -1;
-    if (str) {
-        *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
-        *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
-        *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
-        *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
-        *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
-        *str = (unsigned char)((value & 0x3f) | 0x80);
-    }
-    return 6;
+    return -2;
 }
diff --git a/include/internal/unicode.h b/include/internal/unicode.h
new file mode 100644
index 0000000000..3dcdcd2c89
--- /dev/null
+++ b/include/internal/unicode.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2021 The OpenSSL Project Authors. All Rights Reserved.
+ *
+ * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ */
+
+#ifndef OSSL_INTERNAL_UNICODE_H
+# define OSSL_INTERNAL_UNICODE_H
+# pragma once
+
+typedef enum {
+    SURROGATE_MIN = 0xd800UL,
+    SURROGATE_MAX = 0xdfffUL,
+    UNICODE_MAX = 0x10ffffUL,
+    UNICODE_LIMIT
+} UNICODE_CONSTANTS;
+
+static ossl_unused inline int is_unicode_surrogate(unsigned long value)
+{
+    return value >= SURROGATE_MIN && value <= SURROGATE_MAX;
+}
+
+static ossl_unused inline int is_unicode_valid(unsigned long value)
+{
+    return value <= UNICODE_MAX && !is_unicode_surrogate(value);
+}
+
+#endif
diff --git a/test/asn1_internal_test.c b/test/asn1_internal_test.c
index e77299a7c8..cf201a5a26 100644
--- a/test/asn1_internal_test.c
+++ b/test/asn1_internal_test.c
@@ -107,9 +107,50 @@ static int test_standard_methods(void)
     return 0;
 }
 
+/**********************************************************************
+ *
+ * Tests of the Unicode code point range
+ *
+ ***/
+
+static int test_unicode(const unsigned char *univ, size_t len, int expected)
+{
+    const unsigned char *end = univ + len;
+    int ok = 1;
+
+    for (; univ < end; univ += 4) {
+        if (!TEST_int_eq(ASN1_mbstring_copy(NULL, univ, 4, MBSTRING_UNIV,
+                                            B_ASN1_UTF8STRING),
+                         expected))
+            ok = 0;
+    }
+    return ok;
+}
+
+static int test_unicode_range(void)
+{
+    const unsigned char univ_ok[] = "\0\0\0\0"
+                                    "\0\0\xd7\xff"
+                                    "\0\0\xe0\x00"
+                                    "\0\x10\xff\xff";
+    const unsigned char univ_bad[] = "\0\0\xd8\x00"
+                                     "\0\0\xdf\xff"
+                                     "\0\x11\x00\x00"
+                                     "\x80\x00\x00\x00"
+                                     "\xff\xff\xff\xff";
+    int ok = 1;
+
+    if (!test_unicode(univ_ok, sizeof univ_ok - 1, V_ASN1_UTF8STRING))
+        ok = 0;
+    if (!test_unicode(univ_bad, sizeof univ_bad - 1, -1))
+        ok = 0;
+    return ok;
+}
+
 int setup_tests(void)
 {
     ADD_TEST(test_tbl_standard);
     ADD_TEST(test_standard_methods);
+    ADD_TEST(test_unicode_range);
     return 1;
 }


More information about the openssl-commits mailing list