@@ -24,24 +24,34 @@ static PyTypeObject BytesWriterType;
2424
2525static bool
2626_grow_buffer (BytesWriterObject * data , Py_ssize_t n ) {
27+ if (unlikely (n > PY_SSIZE_T_MAX - data -> len )) {
28+ PyErr_NoMemory ();
29+ return false;
30+ }
2731 Py_ssize_t target = data -> len + n ;
2832 Py_ssize_t size = data -> capacity ;
2933 do {
34+ if (unlikely (size > PY_SSIZE_T_MAX / 2 )) {
35+ PyErr_NoMemory ();
36+ return false;
37+ }
3038 size *= 2 ;
3139 } while (target >= size );
40+ char * new_buf ;
3241 if (data -> buf == data -> data ) {
3342 // Move from embedded buffer to heap-allocated buffer
34- data -> buf = PyMem_Malloc (size );
35- if (data -> buf != NULL ) {
36- memcpy (data -> buf , data -> data , WRITER_EMBEDDED_BUF_LEN );
43+ new_buf = PyMem_Malloc (size );
44+ if (new_buf != NULL ) {
45+ memcpy (new_buf , data -> data , data -> len );
3746 }
3847 } else {
39- data -> buf = PyMem_Realloc (data -> buf , size );
48+ new_buf = PyMem_Realloc (data -> buf , size );
4049 }
41- if (unlikely (data -> buf == NULL )) {
50+ if (unlikely (new_buf == NULL )) {
4251 PyErr_NoMemory ();
4352 return false;
4453 }
54+ data -> buf = new_buf ;
4555 data -> capacity = size ;
4656 return true;
4757}
@@ -71,9 +81,8 @@ BytesWriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
7181 }
7282
7383 BytesWriterObject * self = (BytesWriterObject * )type -> tp_alloc (type , 0 );
74- if (self != NULL ) {
84+ if (self != NULL )
7585 BytesWriter_init_internal (self );
76- }
7786 return (PyObject * )self ;
7887}
7988
@@ -99,6 +108,9 @@ BytesWriter_init(BytesWriterObject *self, PyObject *args, PyObject *kwds)
99108 return -1 ;
100109 }
101110
111+ // Soft reset: free any heap buffer so re-initialization doesn't leak.
112+ if (self -> buf != self -> data && self -> buf != NULL )
113+ PyMem_Free (self -> buf );
102114 BytesWriter_init_internal (self );
103115 return 0 ;
104116}
@@ -396,10 +408,23 @@ grow_string_buffer_helper(StringWriterObject *self, Py_ssize_t target_capacity,
396408 char old_kind = self -> kind ;
397409 Py_ssize_t new_capacity = self -> capacity ;
398410
411+ // Limit so that (new_capacity * 2) * new_kind stays within Py_ssize_t.
412+ // new_kind is always 1, 2, or 4, so use a shift instead of division.
413+ int shift = (new_kind == 4 ) ? 3 : (new_kind == 2 ? 2 : 1 );
414+ Py_ssize_t cap_limit = PY_SSIZE_T_MAX >> shift ;
399415 while (target_capacity >= new_capacity ) {
416+ if (unlikely (new_capacity > cap_limit )) {
417+ PyErr_NoMemory ();
418+ return false;
419+ }
400420 new_capacity *= 2 ;
401421 }
402422
423+ if (unlikely (new_capacity > cap_limit * 2 )) {
424+ PyErr_NoMemory ();
425+ return false;
426+ }
427+
403428 Py_ssize_t size_bytes = new_capacity * new_kind ;
404429 char * new_buf ;
405430 bool from_embedded = (self -> buf == self -> data );
@@ -433,6 +458,10 @@ grow_string_buffer_helper(StringWriterObject *self, Py_ssize_t target_capacity,
433458}
434459
435460static bool grow_string_buffer (StringWriterObject * data , Py_ssize_t n ) {
461+ if (unlikely (n > PY_SSIZE_T_MAX - data -> len )) {
462+ PyErr_NoMemory ();
463+ return false;
464+ }
436465 return grow_string_buffer_helper (data , data -> len + n , data -> kind );
437466}
438467
@@ -464,9 +493,8 @@ StringWriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
464493 }
465494
466495 StringWriterObject * self = (StringWriterObject * )type -> tp_alloc (type , 0 );
467- if (self != NULL ) {
496+ if (self != NULL )
468497 StringWriter_init_internal (self );
469- }
470498 return (PyObject * )self ;
471499}
472500
@@ -492,6 +520,9 @@ StringWriter_init(StringWriterObject *self, PyObject *args, PyObject *kwds)
492520 return -1 ;
493521 }
494522
523+ // Soft reset: free any heap buffer so re-initialization doesn't leak.
524+ if (self -> buf != self -> data && self -> buf != NULL )
525+ PyMem_Free (self -> buf );
495526 StringWriter_init_internal (self );
496527 return 0 ;
497528}
@@ -707,6 +738,13 @@ static void convert_string_data_in_place(char *buf, Py_ssize_t len,
707738}
708739
709740static char convert_string_buffer_kind (StringWriterObject * self , char old_kind , char new_kind ) {
741+ // new_kind is always 2 or 4, so the max representable len is PY_SSIZE_T_MAX >> {1,2}.
742+ // Use a shift instead of a division for cheap overflow check.
743+ Py_ssize_t max_len = PY_SSIZE_T_MAX >> (new_kind == 4 ? 2 : 1 );
744+ if (unlikely (self -> len > max_len )) {
745+ PyErr_NoMemory ();
746+ return CPY_NONE_ERROR ;
747+ }
710748 // Current buffer size in bytes
711749 Py_ssize_t current_buf_size = (self -> buf == self -> data ) ? WRITER_EMBEDDED_BUF_LEN : (self -> capacity * old_kind );
712750 // Needed buffer size in bytes for new kind
@@ -748,7 +786,7 @@ static char string_writer_switch_kind(StringWriterObject *self, int32_t value) {
748786static char string_append_slow_path (StringWriterObject * self , int32_t value ) {
749787 if (self -> kind == 2 ) {
750788 if ((uint32_t )value <= 0xffff ) {
751- // Kind stays the same
789+ // Fast path - kind 2 stays the same
752790 if (!ensure_string_writer_size (self , 1 ))
753791 return CPY_NONE_ERROR ;
754792 // Copy 2-byte character to buffer
@@ -757,28 +795,35 @@ static char string_append_slow_path(StringWriterObject *self, int32_t value) {
757795 self -> len ++ ;
758796 return CPY_NONE ;
759797 }
798+ // Always validate code point range before promotion (but after fast path).
799+ if (unlikely ((uint32_t )value > 0x10FFFF ))
800+ goto fail_range ;
760801 if (string_writer_switch_kind (self , value ) == CPY_NONE_ERROR )
761802 return CPY_NONE_ERROR ;
762803 return string_append_slow_path (self , value );
763804 } else if (self -> kind == 1 ) {
764805 // Check precondition -- this must only be used on slow path
765806 assert ((uint32_t )value > 0xff );
807+ if (unlikely ((uint32_t )value > 0x10FFFF ))
808+ goto fail_range ;
766809 if (string_writer_switch_kind (self , value ) == CPY_NONE_ERROR )
767810 return CPY_NONE_ERROR ;
768811 return string_append_slow_path (self , value );
769812 }
770813 assert (self -> kind == 4 );
771- if ((uint32_t )value <= 0x10FFFF ) {
772- if (!ensure_string_writer_size (self , 1 ))
773- return CPY_NONE_ERROR ;
774- // Copy 4-byte character to buffer
775- uint32_t val32 = (uint32_t )value ;
776- memcpy (self -> buf + self -> len * 4 , & val32 , 4 );
777- self -> len ++ ;
778- return CPY_NONE ;
779- }
780- // Code point is out of valid Unicode range
781- PyErr_Format (PyExc_ValueError , "code point %d is outside valid Unicode range (0-1114111)" , value );
814+ if (unlikely ((uint32_t )value > 0x10FFFF ))
815+ goto fail_range ;
816+ if (!ensure_string_writer_size (self , 1 ))
817+ return CPY_NONE_ERROR ;
818+ // Copy 4-byte character to buffer
819+ uint32_t val32 = (uint32_t )value ;
820+ memcpy (self -> buf + self -> len * 4 , & val32 , 4 );
821+ self -> len ++ ;
822+ return CPY_NONE ;
823+
824+ fail_range :
825+ PyErr_Format (PyExc_ValueError ,
826+ "code point %d is outside valid Unicode range (0-1114111)" , value );
782827 return CPY_NONE_ERROR ;
783828}
784829
0 commit comments