Merge #6478 from ZyX-I/conv-fixes

Remove conversions which are not needed when &encoding is UTF-8
This commit is contained in:
Justin M. Keyes 2017-04-09 03:59:55 +02:00 committed by GitHub
commit cc8f640fb1
13 changed files with 67 additions and 311 deletions

View File

@ -4944,8 +4944,8 @@ json_decode({expr}) *json_decode()*
json_encode({expr}) *json_encode()* json_encode({expr}) *json_encode()*
Convert {expr} into a JSON string. Accepts Convert {expr} into a JSON string. Accepts
|msgpack-special-dict| as the input. Will not convert |Funcref|s, |msgpack-special-dict| as the input. Will not convert
mappings with non-string keys (can be created as |Funcref|s, mappings with non-string keys (can be created as
|msgpack-special-dict|), values with self-referencing |msgpack-special-dict|), values with self-referencing
containers, strings which contain non-UTF-8 characters, containers, strings which contain non-UTF-8 characters,
pseudo-UTF-8 strings which contain codepoints reserved for pseudo-UTF-8 strings which contain codepoints reserved for

View File

@ -5151,8 +5151,8 @@ A jump table for the options with a short description can be found at |Q_op|.
saved. When not included, the value of 'history' is used. saved. When not included, the value of 'history' is used.
*shada-c* *shada-c*
c Dummy option, kept for compatibility reasons. Has no actual c Dummy option, kept for compatibility reasons. Has no actual
effect. Current encoding state is described in effect: ShaDa always uses UTF-8 and 'encoding' value is fixed
|shada-encoding|. to UTF-8 as well.
*shada-f* *shada-f*
f Whether file marks need to be stored. If zero, file marks ('0 f Whether file marks need to be stored. If zero, file marks ('0
to '9, 'A to 'Z) are not stored. When not present or when to '9, 'A to 'Z) are not stored. When not present or when

View File

@ -1097,23 +1097,6 @@ SHADA FILE NAME *shada-file-name*
default and the name given with 'shada' or "-i" (unless it's NONE). default and the name given with 'shada' or "-i" (unless it's NONE).
CHARACTER ENCODING *shada-encoding*
The text in the ShaDa file is UTF-8-encoded. Normally you will always work
with the same 'encoding' value, and this works just fine. However, if you
read the ShaDa file with value for 'encoding' different from utf-8 and
'encoding' used when writing ShaDa file, some of the text (non-ASCII
characters) may be invalid as Neovim always attempts to convert the text in
the ShaDa file from the UTF-8 to the current 'encoding' value. Filenames are
never converted, affected elements are:
- history strings;
- variable values;
- register values;
- last used search and substitute patterns;
- last used substitute replacement string.
MANUALLY READING AND WRITING *shada-read-write* MANUALLY READING AND WRITING *shada-read-write*
Two commands can be used to read and write the ShaDa file manually. This Two commands can be used to read and write the ShaDa file manually. This
@ -1221,8 +1204,11 @@ exactly four MessagePack objects:
3. Third goes the length of the fourth entry. Unsigned integer as well, used 3. Third goes the length of the fourth entry. Unsigned integer as well, used
for fast skipping without parsing. for fast skipping without parsing.
4. Fourth is actual entry data. All currently used ShaDa entries use 4. Fourth is actual entry data. All currently used ShaDa entries use
containers to hold data: either map or array. Exact format depends on the containers to hold data: either map or array. All string values in those
entry type: containers are either binary (applies to filenames) or UTF-8, yet parser
needs to expect that invalid bytes may be present in a UTF-8 string.
Exact format depends on the entry type:
Entry type (name) Entry data ~ Entry type (name) Entry data ~
1 (Header) Map containing data that describes the generator 1 (Header) Map containing data that describes the generator

View File

@ -1,8 +1,8 @@
#ifndef NVIM_DIGRAPH_H #ifndef NVIM_DIGRAPH_H
#define NVIM_DIGRAPH_H #define NVIM_DIGRAPH_H
#include "nvim/types.h" // for char_u #include "nvim/types.h"
#include "nvim/ex_cmds_defs.h" // for exarg_T #include "nvim/ex_cmds_defs.h"
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
# include "digraph.h.generated.h" # include "digraph.h.generated.h"

View File

@ -222,8 +222,6 @@ static inline int json_decoder_pop(ValuesStackItem obj,
/// Parse JSON double-quoted string /// Parse JSON double-quoted string
/// ///
/// @param[in] conv Defines conversion necessary to convert UTF-8 string to
/// &encoding.
/// @param[in] buf Buffer being converted. /// @param[in] buf Buffer being converted.
/// @param[in] buf_len Length of the buffer. /// @param[in] buf_len Length of the buffer.
/// @param[in,out] pp Pointer to the start of the string. Must point to '"'. /// @param[in,out] pp Pointer to the start of the string. Must point to '"'.
@ -240,8 +238,7 @@ static inline int json_decoder_pop(ValuesStackItem obj,
/// value when decoder is restarted, otherwise unused. /// value when decoder is restarted, otherwise unused.
/// ///
/// @return OK in case of success, FAIL in case of error. /// @return OK in case of success, FAIL in case of error.
static inline int parse_json_string(vimconv_T *const conv, static inline int parse_json_string(const char *const buf, const size_t buf_len,
const char *const buf, const size_t buf_len,
const char **const pp, const char **const pp,
ValuesStack *const stack, ValuesStack *const stack,
ContainerStack *const container_stack, ContainerStack *const container_stack,
@ -416,20 +413,6 @@ static inline int parse_json_string(vimconv_T *const conv,
} }
PUT_FST_IN_PAIR(fst_in_pair, str_end); PUT_FST_IN_PAIR(fst_in_pair, str_end);
#undef PUT_FST_IN_PAIR #undef PUT_FST_IN_PAIR
if (conv->vc_type != CONV_NONE) {
size_t str_len = (size_t) (str_end - str);
char *const new_str = (char *) string_convert(conv, (char_u *) str,
&str_len);
if (new_str == NULL) {
emsgf(_("E474: Failed to convert string \"%.*s\" from UTF-8"),
(int) str_len, str);
xfree(str);
goto parse_json_string_fail;
}
xfree(str);
str = new_str;
str_end = new_str + str_len;
}
if (hasnul) { if (hasnul) {
typval_T obj; typval_T obj;
list_T *const list = tv_list_alloc(); list_T *const list = tv_list_alloc();
@ -626,9 +609,6 @@ int json_decode_string(const char *const buf, const size_t buf_len,
EMSG(_("E474: Attempt to decode a blank string")); EMSG(_("E474: Attempt to decode a blank string"));
return FAIL; return FAIL;
} }
vimconv_T conv = { .vc_type = CONV_NONE };
convert_setup(&conv, (char_u *) "utf-8", p_enc);
conv.vc_fail = true;
int ret = OK; int ret = OK;
ValuesStack stack = KV_INITIAL_VALUE; ValuesStack stack = KV_INITIAL_VALUE;
ContainerStack container_stack = KV_INITIAL_VALUE; ContainerStack container_stack = KV_INITIAL_VALUE;
@ -774,7 +754,7 @@ json_decode_string_cycle_start:
break; break;
} }
case '"': { case '"': {
if (parse_json_string(&conv, buf, buf_len, &p, &stack, &container_stack, if (parse_json_string(buf, buf_len, &p, &stack, &container_stack,
&next_map_special, &didcomma, &didcolon) &next_map_special, &didcomma, &didcolon)
== FAIL) { == FAIL) {
// Error message was already given // Error message was already given

View File

@ -11,7 +11,7 @@
#include <math.h> #include <math.h>
#include "nvim/eval/encode.h" #include "nvim/eval/encode.h"
#include "nvim/buffer_defs.h" // vimconv_T #include "nvim/buffer_defs.h"
#include "nvim/eval.h" #include "nvim/eval.h"
#include "nvim/eval/typval.h" #include "nvim/eval/typval.h"
#include "nvim/garray.h" #include "nvim/garray.h"
@ -29,10 +29,6 @@
#define utf_ptr2char(b) utf_ptr2char((char_u *)b) #define utf_ptr2char(b) utf_ptr2char((char_u *)b)
#define utf_ptr2len(b) ((size_t)utf_ptr2len((char_u *)b)) #define utf_ptr2len(b) ((size_t)utf_ptr2len((char_u *)b))
#define utf_char2len(b) ((size_t)utf_char2len(b)) #define utf_char2len(b) ((size_t)utf_char2len(b))
#define string_convert(a, b, c) \
((char *)string_convert((vimconv_T *)a, (char_u *)b, c))
#define convert_setup(vcp, from, to) \
(convert_setup(vcp, (char_u *)from, (char_u *)to))
const char *const encode_special_var_names[] = { const char *const encode_special_var_names[] = {
[kSpecialVarNull] = "null", [kSpecialVarNull] = "null",
@ -537,17 +533,6 @@ int encode_read_from_list(ListReaderState *const state, char *const buf,
} \ } \
} while (0) } while (0)
/// Last used p_enc value
///
/// Generic pointer: it is not used as a string, only pointer comparisons are
/// performed. Must not be freed.
static const void *last_p_enc = NULL;
/// Conversion setup for converting from last_p_enc to UTF-8
static vimconv_T p_enc_conv = {
.vc_type = CONV_NONE,
};
/// Escape sequences used in JSON /// Escape sequences used in JSON
static const char escapes[][3] = { static const char escapes[][3] = {
[BS] = "\\b", [BS] = "\\b",
@ -579,33 +564,15 @@ static inline int convert_to_json_string(garray_T *const gap,
} else { } else {
size_t utf_len = len; size_t utf_len = len;
char *tofree = NULL; char *tofree = NULL;
if (last_p_enc != (const void *) p_enc) {
p_enc_conv.vc_type = CONV_NONE;
convert_setup(&p_enc_conv, p_enc, "utf-8");
p_enc_conv.vc_fail = true;
last_p_enc = p_enc;
}
if (p_enc_conv.vc_type != CONV_NONE) {
tofree = string_convert(&p_enc_conv, buf, &utf_len);
if (tofree == NULL) {
emsgf(_("E474: Failed to convert string \"%.*s\" to UTF-8"),
utf_len, utf_buf);
return FAIL;
}
utf_buf = tofree;
}
size_t str_len = 0; size_t str_len = 0;
// Encode character as \u0000 if // Encode character as \uNNNN if
// 1. It is an ASCII control character (0x0 .. 0x1F, 0x7F). // 1. It is an ASCII control character (0x0 .. 0x1F; 0x7F not
// 2. &encoding is not UTF-8 and code point is above 0x7F. // utf_printable and thus not checked specially).
// 3. &encoding is UTF-8 and code point is not printable according to // 2. Code point is not printable according to utf_printable().
// utf_printable(). // This is done to make resulting values displayable on screen also not from
// This is done to make it possible to :echo values when &encoding is not // Neovim.
// UTF-8. #define ENCODE_RAW(ch) \
#define ENCODE_RAW(p_enc_conv, ch) \ (ch >= 0x20 && utf_printable(ch))
(ch >= 0x20 && (p_enc_conv.vc_type == CONV_NONE \
? utf_printable(ch) \
: ch < 0x7F))
for (size_t i = 0; i < utf_len;) { for (size_t i = 0; i < utf_len;) {
const int ch = utf_ptr2char(utf_buf + i); const int ch = utf_ptr2char(utf_buf + i);
const size_t shift = (ch == 0? 1: utf_ptr2len(utf_buf + i)); const size_t shift = (ch == 0? 1: utf_ptr2len(utf_buf + i));
@ -636,7 +603,7 @@ static inline int convert_to_json_string(garray_T *const gap,
utf_len - (i - shift), utf_buf + i - shift); utf_len - (i - shift), utf_buf + i - shift);
xfree(tofree); xfree(tofree);
return FAIL; return FAIL;
} else if (ENCODE_RAW(p_enc_conv, ch)) { } else if (ENCODE_RAW(ch)) {
str_len += shift; str_len += shift;
} else { } else {
str_len += ((sizeof("\\u1234") - 1) str_len += ((sizeof("\\u1234") - 1)
@ -666,7 +633,7 @@ static inline int convert_to_json_string(garray_T *const gap,
break; break;
} }
default: { default: {
if (ENCODE_RAW(p_enc_conv, ch)) { if (ENCODE_RAW(ch)) {
ga_concat_len(gap, utf_buf + i, shift); ga_concat_len(gap, utf_buf + i, shift);
} else if (ch < SURROGATE_FIRST_CHAR) { } else if (ch < SURROGATE_FIRST_CHAR) {
ga_concat_len(gap, ((const char[]) { ga_concat_len(gap, ((const char[]) {

View File

@ -6,8 +6,8 @@
#include "nvim/os/time.h" #include "nvim/os/time.h"
#include "nvim/pos.h" #include "nvim/pos.h"
#include "nvim/eval/typval.h" #include "nvim/eval/typval.h"
#include "nvim/buffer_defs.h" // for buf_T and win_T #include "nvim/buffer_defs.h"
#include "nvim/ex_cmds_defs.h" // for exarg_T #include "nvim/ex_cmds_defs.h"
// flags for do_ecmd() // flags for do_ecmd()
#define ECMD_HIDE 0x01 // don't free the current buffer #define ECMD_HIDE 0x01 // don't free the current buffer

View File

@ -3,9 +3,9 @@
#include "nvim/eval/typval.h" #include "nvim/eval/typval.h"
#include "nvim/ex_cmds.h" #include "nvim/ex_cmds.h"
#include "nvim/ex_cmds_defs.h" // for exarg_T #include "nvim/ex_cmds_defs.h"
#include "nvim/os/time.h" // for Timestamp #include "nvim/os/time.h"
#include "nvim/regexp_defs.h" // for regmatch_T #include "nvim/regexp_defs.h"
/* Values for nextwild() and ExpandOne(). See ExpandOne() for meaning. */ /* Values for nextwild() and ExpandOne(). See ExpandOne() for meaning. */
#define WILD_FREE 1 #define WILD_FREE 1

View File

@ -1,12 +1,12 @@
#ifndef NVIM_FOLD_H #ifndef NVIM_FOLD_H
#define NVIM_FOLD_H #define NVIM_FOLD_H
#include <stdio.h> // for FILE #include <stdio.h>
#include "nvim/pos.h" #include "nvim/pos.h"
#include "nvim/garray.h" // for garray_T #include "nvim/garray.h"
#include "nvim/types.h" // for char_u #include "nvim/types.h"
#include "nvim/buffer_defs.h" // for win_T #include "nvim/buffer_defs.h"
/* /*
* Info used to pass info about a fold from the fold-detection code to the * Info used to pass info about a fold from the fold-detection code to the

View File

@ -3,10 +3,10 @@
#include <stdint.h> #include <stdint.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h> // for size_t #include <stdlib.h>
#include "nvim/types.h" #include "nvim/types.h"
#include "nvim/pos.h" // for linenr_T #include "nvim/pos.h"
/// A block number. /// A block number.
/// ///

View File

@ -2,8 +2,8 @@
#define NVIM_MOVE_H #define NVIM_MOVE_H
#include <stdbool.h> #include <stdbool.h>
#include "nvim/buffer_defs.h" // for win_T #include "nvim/buffer_defs.h"
#include "nvim/pos.h" // for linenr_T #include "nvim/pos.h"
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
# include "move.h.generated.h" # include "move.h.generated.h"

View File

@ -73,15 +73,10 @@ KHASH_SET_INIT_STR(strset)
(vim_rename((char_u *)a, (char_u *)b)) (vim_rename((char_u *)a, (char_u *)b))
#define mb_strnicmp(a, b, c) \ #define mb_strnicmp(a, b, c) \
(mb_strnicmp((char_u *)a, (char_u *)b, c)) (mb_strnicmp((char_u *)a, (char_u *)b, c))
#define has_non_ascii(a) (has_non_ascii((char_u *)a))
#define string_convert(a, b, c) \
((char *)string_convert((vimconv_T *)a, (char_u *)b, c))
#define path_shorten_fname_if_possible(b) \ #define path_shorten_fname_if_possible(b) \
((char *)path_shorten_fname_if_possible((char_u *)b)) ((char *)path_shorten_fname_if_possible((char_u *)b))
#define buflist_new(ffname, sfname, ...) \ #define buflist_new(ffname, sfname, ...) \
(buflist_new((char_u *)ffname, (char_u *)sfname, __VA_ARGS__)) (buflist_new((char_u *)ffname, (char_u *)sfname, __VA_ARGS__))
#define convert_setup(vcp, from, to) \
(convert_setup(vcp, (char_u *)from, (char_u *)to))
#define os_isdir(f) (os_isdir((char_u *) f)) #define os_isdir(f) (os_isdir((char_u *) f))
#define regtilde(s, m) ((char *) regtilde((char_u *) s, m)) #define regtilde(s, m) ((char *) regtilde((char_u *) s, m))
#define path_tail_with_sep(f) ((char *) path_tail_with_sep((char_u *)f)) #define path_tail_with_sep(f) ((char *) path_tail_with_sep((char_u *)f))
@ -413,8 +408,6 @@ typedef struct sd_read_def {
const char *error; ///< Error message in case of error. const char *error; ///< Error message in case of error.
uintmax_t fpos; ///< Current position (amount of bytes read since uintmax_t fpos; ///< Current position (amount of bytes read since
///< reader structure initialization). May overflow. ///< reader structure initialization). May overflow.
vimconv_T sd_conv; ///< Structure used for converting encodings of some
///< items.
} ShaDaReadDef; } ShaDaReadDef;
struct sd_write_def; struct sd_write_def;
@ -435,8 +428,6 @@ typedef struct sd_write_def {
ShaDaWriteCloser close; ///< Close function. ShaDaWriteCloser close; ///< Close function.
void *cookie; ///< Data describing object written to. void *cookie; ///< Data describing object written to.
const char *error; ///< Error message in case of error. const char *error; ///< Error message in case of error.
vimconv_T sd_conv; ///< Structure used for converting encodings of some
///< items.
} ShaDaWriteDef; } ShaDaWriteDef;
#ifdef INCLUDE_GENERATED_DECLARATIONS #ifdef INCLUDE_GENERATED_DECLARATIONS
@ -803,7 +794,7 @@ static int open_shada_file_for_reading(const char *const fname,
return error; return error;
} }
convert_setup(&sd_reader->sd_conv, "utf-8", p_enc); assert(STRCMP(p_enc, "utf-8") == 0);
return 0; return 0;
} }
@ -1899,127 +1890,24 @@ shada_pack_entry_error:
} }
#undef PACK_STRING #undef PACK_STRING
/// Write single ShaDa entry, converting it if needed /// Write single ShaDa entry and free it afterwards
/// ///
/// @warning Frees entry after packing. /// Will not free if entry could not be freed.
/// ///
/// @param[in] packer Packer used to write entry. /// @param[in] packer Packer used to write entry.
/// @param[in] sd_conv Conversion definitions. /// @param[in] entry Entry written.
/// @param[in] entry Entry written. If entry.can_free_entry is false then
/// it assumes that entry was not converted, otherwise it
/// is assumed that entry was already converted.
/// @param[in] max_kbyte Maximum size of an item in KiB. Zero means no /// @param[in] max_kbyte Maximum size of an item in KiB. Zero means no
/// restrictions. /// restrictions.
static ShaDaWriteResult shada_pack_encoded_entry(msgpack_packer *const packer, static inline ShaDaWriteResult shada_pack_pfreed_entry(
const vimconv_T *const sd_conv, msgpack_packer *const packer, PossiblyFreedShadaEntry entry,
PossiblyFreedShadaEntry entry, const size_t max_kbyte)
const size_t max_kbyte) FUNC_ATTR_NONNULL_ALL FUNC_ATTR_ALWAYS_INLINE
FUNC_ATTR_NONNULL_ALL
{ {
ShaDaWriteResult ret = kSDWriteSuccessfull; ShaDaWriteResult ret = kSDWriteSuccessfull;
ret = shada_pack_entry(packer, entry.data, max_kbyte);
if (entry.can_free_entry) { if (entry.can_free_entry) {
ret = shada_pack_entry(packer, entry.data, max_kbyte);
shada_free_shada_entry(&entry.data); shada_free_shada_entry(&entry.data);
return ret;
} }
#define RUN_WITH_CONVERTED_STRING(cstr, code) \
do { \
bool did_convert = false; \
if (sd_conv->vc_type != CONV_NONE && has_non_ascii((cstr))) { \
char *const converted_string = string_convert(sd_conv, (cstr), NULL); \
if (converted_string != NULL) { \
(cstr) = converted_string; \
did_convert = true; \
} \
} \
code \
if (did_convert) { \
xfree((cstr)); \
} \
} while (0)
switch (entry.data.type) {
case kSDItemUnknown:
case kSDItemMissing: {
assert(false);
}
case kSDItemSearchPattern: {
RUN_WITH_CONVERTED_STRING(entry.data.data.search_pattern.pat, {
ret = shada_pack_entry(packer, entry.data, max_kbyte);
});
break;
}
case kSDItemHistoryEntry: {
RUN_WITH_CONVERTED_STRING(entry.data.data.history_item.string, {
ret = shada_pack_entry(packer, entry.data, max_kbyte);
});
break;
}
case kSDItemSubString: {
RUN_WITH_CONVERTED_STRING(entry.data.data.sub_string.sub, {
ret = shada_pack_entry(packer, entry.data, max_kbyte);
});
break;
}
case kSDItemVariable: {
if (sd_conv->vc_type != CONV_NONE) {
typval_T tgttv;
var_item_copy(sd_conv, &entry.data.data.global_var.value, &tgttv,
true, 0);
tv_clear(&entry.data.data.global_var.value);
entry.data.data.global_var.value = tgttv;
}
ret = shada_pack_entry(packer, entry.data, max_kbyte);
break;
}
case kSDItemRegister: {
bool did_convert = false;
if (sd_conv->vc_type != CONV_NONE) {
size_t first_non_ascii = 0;
for (size_t i = 0; i < entry.data.data.reg.contents_size; i++) {
if (has_non_ascii(entry.data.data.reg.contents[i])) {
first_non_ascii = i;
did_convert = true;
break;
}
}
if (did_convert) {
entry.data.data.reg.contents =
xmemdup(entry.data.data.reg.contents,
(entry.data.data.reg.contents_size
* sizeof(entry.data.data.reg.contents[0])));
for (size_t i = 0; i < entry.data.data.reg.contents_size; i++) {
if (i >= first_non_ascii) {
entry.data.data.reg.contents[i] = get_converted_string(
sd_conv,
entry.data.data.reg.contents[i],
strlen(entry.data.data.reg.contents[i]));
} else {
entry.data.data.reg.contents[i] =
xstrdup(entry.data.data.reg.contents[i]);
}
}
}
}
ret = shada_pack_entry(packer, entry.data, max_kbyte);
if (did_convert) {
for (size_t i = 0; i < entry.data.data.reg.contents_size; i++) {
xfree(entry.data.data.reg.contents[i]);
}
xfree(entry.data.data.reg.contents);
}
break;
}
case kSDItemHeader:
case kSDItemGlobalMark:
case kSDItemJump:
case kSDItemBufferList:
case kSDItemLocalMark:
case kSDItemChange: {
ret = shada_pack_entry(packer, entry.data, max_kbyte);
break;
}
}
#undef RUN_WITH_CONVERTED_STRING
return ret; return ret;
} }
@ -2556,11 +2444,7 @@ static ShaDaWriteResult shada_write(ShaDaWriteDef *const sd_writer,
break; break;
} }
typval_T tgttv; typval_T tgttv;
if (sd_writer->sd_conv.vc_type != CONV_NONE) { tv_copy(&vartv, &tgttv);
var_item_copy(&sd_writer->sd_conv, &vartv, &tgttv, true, 0);
} else {
tv_copy(&vartv, &tgttv);
}
ShaDaWriteResult spe_ret; ShaDaWriteResult spe_ret;
if ((spe_ret = shada_pack_entry(packer, (ShadaEntry) { if ((spe_ret = shada_pack_entry(packer, (ShadaEntry) {
.type = kSDItemVariable, .type = kSDItemVariable,
@ -2811,9 +2695,8 @@ static ShaDaWriteResult shada_write(ShaDaWriteDef *const sd_writer,
do { \ do { \
for (size_t i_ = 0; i_ < ARRAY_SIZE(wms_array); i_++) { \ for (size_t i_ = 0; i_ < ARRAY_SIZE(wms_array); i_++) { \
if (wms_array[i_].data.type != kSDItemMissing) { \ if (wms_array[i_].data.type != kSDItemMissing) { \
if (shada_pack_encoded_entry(packer, &sd_writer->sd_conv, \ if (shada_pack_pfreed_entry(packer, wms_array[i_], max_kbyte) \
wms_array[i_], \ == kSDWriteFailed) { \
max_kbyte) == kSDWriteFailed) { \
ret = kSDWriteFailed; \ ret = kSDWriteFailed; \
goto shada_write_exit; \ goto shada_write_exit; \
} \ } \
@ -2823,8 +2706,8 @@ static ShaDaWriteResult shada_write(ShaDaWriteDef *const sd_writer,
PACK_WMS_ARRAY(wms->global_marks); PACK_WMS_ARRAY(wms->global_marks);
PACK_WMS_ARRAY(wms->registers); PACK_WMS_ARRAY(wms->registers);
for (size_t i = 0; i < wms->jumps_size; i++) { for (size_t i = 0; i < wms->jumps_size; i++) {
if (shada_pack_encoded_entry(packer, &sd_writer->sd_conv, wms->jumps[i], if (shada_pack_pfreed_entry(packer, wms->jumps[i], max_kbyte)
max_kbyte) == kSDWriteFailed) { == kSDWriteFailed) {
ret = kSDWriteFailed; ret = kSDWriteFailed;
goto shada_write_exit; goto shada_write_exit;
} }
@ -2832,8 +2715,8 @@ static ShaDaWriteResult shada_write(ShaDaWriteDef *const sd_writer,
#define PACK_WMS_ENTRY(wms_entry) \ #define PACK_WMS_ENTRY(wms_entry) \
do { \ do { \
if (wms_entry.data.type != kSDItemMissing) { \ if (wms_entry.data.type != kSDItemMissing) { \
if (shada_pack_encoded_entry(packer, &sd_writer->sd_conv, wms_entry, \ if (shada_pack_pfreed_entry(packer, wms_entry, max_kbyte) \
max_kbyte) == kSDWriteFailed) { \ == kSDWriteFailed) { \
ret = kSDWriteFailed; \ ret = kSDWriteFailed; \
goto shada_write_exit; \ goto shada_write_exit; \
} \ } \
@ -2860,9 +2743,8 @@ static ShaDaWriteResult shada_write(ShaDaWriteDef *const sd_writer,
for (size_t i = 0; i < file_markss_to_dump; i++) { for (size_t i = 0; i < file_markss_to_dump; i++) {
PACK_WMS_ARRAY(all_file_markss[i]->marks); PACK_WMS_ARRAY(all_file_markss[i]->marks);
for (size_t j = 0; j < all_file_markss[i]->changes_size; j++) { for (size_t j = 0; j < all_file_markss[i]->changes_size; j++) {
if (shada_pack_encoded_entry(packer, &sd_writer->sd_conv, if (shada_pack_pfreed_entry(packer, all_file_markss[i]->changes[j],
all_file_markss[i]->changes[j], max_kbyte) == kSDWriteFailed) {
max_kbyte) == kSDWriteFailed) {
ret = kSDWriteFailed; ret = kSDWriteFailed;
goto shada_write_exit; goto shada_write_exit;
} }
@ -2886,8 +2768,8 @@ static ShaDaWriteResult shada_write(ShaDaWriteDef *const sd_writer,
if (dump_one_history[i]) { if (dump_one_history[i]) {
hms_insert_whole_neovim_history(&wms->hms[i]); hms_insert_whole_neovim_history(&wms->hms[i]);
HMS_ITER(&wms->hms[i], cur_entry, { HMS_ITER(&wms->hms[i], cur_entry, {
if (shada_pack_encoded_entry( if (shada_pack_pfreed_entry(
packer, &sd_writer->sd_conv, (PossiblyFreedShadaEntry) { packer, (PossiblyFreedShadaEntry) {
.data = cur_entry->data, .data = cur_entry->data,
.can_free_entry = cur_entry->can_free_entry, .can_free_entry = cur_entry->can_free_entry,
}, max_kbyte) == kSDWriteFailed) { }, max_kbyte) == kSDWriteFailed) {
@ -3038,8 +2920,6 @@ shada_write_file_nomerge: {}
verbose_leave(); verbose_leave();
} }
convert_setup(&sd_writer.sd_conv, p_enc, "utf-8");
const ShaDaWriteResult sw_ret = shada_write(&sd_writer, (nomerge const ShaDaWriteResult sw_ret = shada_write(&sd_writer, (nomerge
? NULL ? NULL
: &sd_reader)); : &sd_reader));
@ -3327,29 +3207,6 @@ static ShaDaReadResult msgpack_read_uint64(ShaDaReadDef *const sd_reader,
return kSDReadStatusSuccess; return kSDReadStatusSuccess;
} }
/// Convert or copy and return a string
///
/// @param[in] sd_conv Conversion definition.
/// @param[in] str String to convert.
/// @param[in] len String length.
///
/// @return [allocated] converted string or copy of the original string.
static inline char *get_converted_string(const vimconv_T *const sd_conv,
const char *const str,
const size_t len)
FUNC_ATTR_NONNULL_ALL FUNC_ATTR_MALLOC FUNC_ATTR_WARN_UNUSED_RESULT
{
if (!has_non_ascii_len(str, len)) {
return xmemdupz(str, len);
}
size_t new_len = len;
char *const new_str = string_convert(sd_conv, str, &new_len);
if (new_str == NULL) {
return xmemdupz(str, len);
}
return new_str;
}
#define READERR(entry_name, error_desc) \ #define READERR(entry_name, error_desc) \
RERR "Error while reading ShaDa file: " \ RERR "Error while reading ShaDa file: " \
entry_name " entry at position %" PRIu64 " " \ entry_name " entry at position %" PRIu64 " " \
@ -3427,10 +3284,7 @@ static inline char *get_converted_string(const vimconv_T *const sd_conv,
sizeof(*unpacked.data.via.map.ptr)); \ sizeof(*unpacked.data.via.map.ptr)); \
ad_ga.ga_len++; \ ad_ga.ga_len++; \
} }
#define CONVERTED(str, len) ( \ #define CONVERTED(str, len) (xmemdupz((str), (len)))
sd_reader->sd_conv.vc_type != CONV_NONE \
? get_converted_string(&sd_reader->sd_conv, (str), (len)) \
: xmemdupz((str), (len)))
#define BIN_CONVERTED(b) CONVERTED(b.ptr, b.size) #define BIN_CONVERTED(b) CONVERTED(b.ptr, b.size)
#define SET_ADDITIONAL_DATA(tgt, name) \ #define SET_ADDITIONAL_DATA(tgt, name) \
do { \ do { \
@ -3803,30 +3657,14 @@ shada_read_next_item_start:
(char) unpacked.data.via.array.ptr[2].via.u64; (char) unpacked.data.via.array.ptr[2].via.u64;
} }
size_t strsize; size_t strsize;
if (sd_reader->sd_conv.vc_type == CONV_NONE strsize = (
|| !has_non_ascii_len(unpacked.data.via.array.ptr[1].via.bin.ptr, unpacked.data.via.array.ptr[1].via.bin.size
unpacked.data.via.array.ptr[1].via.bin.size)) { + 1 // Zero byte
shada_read_next_item_hist_no_conv: + 1); // Separator character
strsize = ( entry->data.history_item.string = xmalloc(strsize);
unpacked.data.via.array.ptr[1].via.bin.size memcpy(entry->data.history_item.string,
+ 1 // Zero byte unpacked.data.via.array.ptr[1].via.bin.ptr,
+ 1); // Separator character unpacked.data.via.array.ptr[1].via.bin.size);
entry->data.history_item.string = xmalloc(strsize);
memcpy(entry->data.history_item.string,
unpacked.data.via.array.ptr[1].via.bin.ptr,
unpacked.data.via.array.ptr[1].via.bin.size);
} else {
size_t len = unpacked.data.via.array.ptr[1].via.bin.size;
char *const converted = string_convert(
&sd_reader->sd_conv, unpacked.data.via.array.ptr[1].via.bin.ptr,
&len);
if (converted != NULL) {
strsize = len + 2;
entry->data.history_item.string = xrealloc(converted, strsize);
} else {
goto shada_read_next_item_hist_no_conv;
}
}
entry->data.history_item.string[strsize - 2] = 0; entry->data.history_item.string[strsize - 2] = 0;
entry->data.history_item.string[strsize - 1] = entry->data.history_item.string[strsize - 1] =
entry->data.history_item.sep; entry->data.history_item.sep;
@ -3859,16 +3697,6 @@ shada_read_next_item_hist_no_conv:
"be converted to the VimL value")), initial_fpos); "be converted to the VimL value")), initial_fpos);
goto shada_read_next_item_error; goto shada_read_next_item_error;
} }
if (sd_reader->sd_conv.vc_type != CONV_NONE) {
typval_T tgttv;
var_item_copy(&sd_reader->sd_conv,
&entry->data.global_var.value,
&tgttv,
true,
0);
tv_clear(&entry->data.global_var.value);
entry->data.global_var.value = tgttv;
}
SET_ADDITIONAL_ELEMENTS(unpacked.data.via.array, 2, SET_ADDITIONAL_ELEMENTS(unpacked.data.via.array, 2,
entry->data.global_var.additional_elements, entry->data.global_var.additional_elements,
"variable"); "variable");

View File

@ -2,7 +2,6 @@ local helpers = require('test.unit.helpers')(after_each)
local itp = helpers.gen_itp(it) local itp = helpers.gen_itp(it)
local cimport = helpers.cimport local cimport = helpers.cimport
local to_cstr = helpers.to_cstr
local eq = helpers.eq local eq = helpers.eq
local neq = helpers.neq local neq = helpers.neq
local ffi = helpers.ffi local ffi = helpers.ffi
@ -72,7 +71,7 @@ describe('json_decode_string()', function()
end end
itp('does not overflow in error messages', function() itp('does not overflow in error messages', function()
local saved_p_enc = decode.p_enc collectgarbage('restart')
check_failure(']test', 1, 'E474: No container to close: ]') check_failure(']test', 1, 'E474: No container to close: ]')
check_failure('[}test', 2, 'E474: Closing list with curly bracket: }') check_failure('[}test', 2, 'E474: Closing list with curly bracket: }')
check_failure('{]test', 2, check_failure('{]test', 2,
@ -105,10 +104,6 @@ describe('json_decode_string()', function()
check_failure('"\194"test', 3, 'E474: Only UTF-8 strings allowed: \194"') check_failure('"\194"test', 3, 'E474: Only UTF-8 strings allowed: \194"')
check_failure('"\252\144\128\128\128\128"test', 8, 'E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \252\144\128\128\128\128"') check_failure('"\252\144\128\128\128\128"test', 8, 'E474: Only UTF-8 code points up to U+10FFFF are allowed to appear unescaped: \252\144\128\128\128\128"')
check_failure('"test', 1, 'E474: Expected string end: "') check_failure('"test', 1, 'E474: Expected string end: "')
decode.p_enc = to_cstr('latin1')
check_failure('"\\uABCD"test', 8,
'E474: Failed to convert string "ꯍ" from UTF-8')
decode.p_enc = saved_p_enc
check_failure('-test', 1, 'E474: Missing number after minus sign: -') check_failure('-test', 1, 'E474: Missing number after minus sign: -')
check_failure('-1.test', 3, 'E474: Missing number after decimal dot: -1.') check_failure('-1.test', 3, 'E474: Missing number after decimal dot: -1.')
check_failure('-1.0etest', 5, 'E474: Missing exponent: -1.0e') check_failure('-1.0etest', 5, 'E474: Missing exponent: -1.0e')