feat(api): implement nvim_buf_get_text (#15181)

nvim_buf_get_text is the mirror of nvim_buf_set_text. It differs from nvim_buf_get_lines in that it allows retrieving only portions of lines. While this can typically be done easily enough by API clients, implementing this function provides symmetry between the get/set text/lines APIs, and also provides a nice convenience that saves API clients the work of having to slice the result of nvim_buf_get_lines themselves.
2025-02-25 18:55:25 -06:00 · 2022-02-22 13:19:21 -07:00 · 2022-02-22 13:19:21 -07:00 · 11f7aeed7a
commit 11f7aeed7a
parent 30c9c8815b
4 changed files with 207 additions and 10 deletions
--- a/runtime/doc/api.txt
+++ b/runtime/doc/api.txt
@ -2152,6 +2152,29 @@ nvim_buf_get_option({buffer}, {name})                  *nvim_buf_get_option()*
                Return: ~
                    Option value

+                                                         *nvim_buf_get_text()*
+nvim_buf_get_text({buffer}, {start_row}, {start_col}, {end_row}, {end_col},
+                  {opts})
+                Gets a range from the buffer.
+
+                This differs from |nvim_buf_get_lines()| in that it allows
+                retrieving only portions of a line.
+
+                Indexing is zero-based. Column indices are end-exclusive.
+
+                Prefer |nvim_buf_get_lines()| when retrieving entire lines.
+
+                Parameters: ~
+                    {buffer}     Buffer handle, or 0 for current buffer
+                    {start_row}  First line index
+                    {start_col}  Starting byte offset of first line
+                    {end_row}    Last line index
+                    {end_col}    Ending byte offset of last line (exclusive)
+                    {opts}       Optional parameters. Currently unused.
+
+                Return: ~
+                    Array of lines, or empty array for unloaded buffer.
+
 nvim_buf_get_var({buffer}, {name})                        *nvim_buf_get_var()*
                Gets a buffer-scoped (b:) variable.

--- a/src/nvim/api/buffer.c
+++ b/src/nvim/api/buffer.c
@ -287,8 +287,8 @@ ArrayOf(String) nvim_buf_get_lines(uint64_t channel_id,
  }

  bool oob = false;
-  start = normalize_index(buf, start, &oob);
-  end = normalize_index(buf, end, &oob);
+  start = normalize_index(buf, start, true, &oob);
+  end = normalize_index(buf, end, true, &oob);

  if (strict_indexing && oob) {
    api_set_error(err, kErrorTypeValidation, "Index out of bounds");
@ -374,15 +374,14 @@ void nvim_buf_set_lines(uint64_t channel_id, Buffer buffer, Integer start, Integ
  }

  bool oob = false;
-  start = normalize_index(buf, start, &oob);
-  end = normalize_index(buf, end, &oob);
+  start = normalize_index(buf, start, true, &oob);
+  end = normalize_index(buf, end, true, &oob);

  if (strict_indexing && oob) {
    api_set_error(err, kErrorTypeValidation, "Index out of bounds");
    return;
  }

-
  if (start > end) {
    api_set_error(err,
                  kErrorTypeValidation,
@ -554,13 +553,13 @@ void nvim_buf_set_text(uint64_t channel_id, Buffer buffer, Integer start_row, In

  // check range is ordered and everything!
  // start_row, end_row within buffer len (except add text past the end?)
-  start_row = normalize_index(buf, start_row, &oob);
+  start_row = normalize_index(buf, start_row, true, &oob);
  if (oob || start_row == buf->b_ml.ml_line_count + 1) {
    api_set_error(err, kErrorTypeValidation, "start_row out of bounds");
    return;
  }

-  end_row = normalize_index(buf, end_row, &oob);
+  end_row = normalize_index(buf, end_row, true, &oob);
  if (oob || end_row == buf->b_ml.ml_line_count + 1) {
    api_set_error(err, kErrorTypeValidation, "end_row out of bounds");
    return;
@ -757,6 +756,108 @@ end:
  try_end(err);
 }

+/// Gets a range from the buffer.
+///
+/// This differs from |nvim_buf_get_lines()| in that it allows retrieving only
+/// portions of a line.
+///
+/// Indexing is zero-based. Column indices are end-exclusive.
+///
+/// Prefer |nvim_buf_get_lines()| when retrieving entire lines.
+///
+/// @param channel_id
+/// @param buffer     Buffer handle, or 0 for current buffer
+/// @param start_row  First line index
+/// @param start_col  Starting byte offset of first line
+/// @param end_row    Last line index
+/// @param end_col    Ending byte offset of last line (exclusive)
+/// @param opts       Optional parameters. Currently unused.
+/// @param[out] err   Error details, if any
+/// @return Array of lines, or empty array for unloaded buffer.
+ArrayOf(String) nvim_buf_get_text(uint64_t channel_id, Buffer buffer,
+                                  Integer start_row, Integer start_col,
+                                  Integer end_row, Integer end_col,
+                                  Dictionary opts, Error *err)
+  FUNC_API_SINCE(9)
+{
+  Array rv = ARRAY_DICT_INIT;
+
+  if (opts.size > 0) {
+    api_set_error(err, kErrorTypeValidation, "opts dict isn't empty");
+    return rv;
+  }
+
+  buf_T *buf = find_buffer_by_handle(buffer, err);
+
+  if (!buf) {
+    return rv;
+  }
+
+  // return sentinel value if the buffer isn't loaded
+  if (buf->b_ml.ml_mfp == NULL) {
+    return rv;
+  }
+
+  bool oob = false;
+  start_row = normalize_index(buf, start_row, false, &oob);
+  end_row = normalize_index(buf, end_row, false, &oob);
+
+  if (oob) {
+    api_set_error(err, kErrorTypeValidation, "Index out of bounds");
+    return rv;
+  }
+
+  // nvim_buf_get_lines doesn't care if the start row is greater than the end
+  // row (it will just return an empty array), but nvim_buf_get_text does in
+  // order to maintain symmetry with nvim_buf_set_text.
+  if (start_row > end_row) {
+    api_set_error(err, kErrorTypeValidation, "start is higher than end");
+    return rv;
+  }
+
+  bool replace_nl = (channel_id != VIML_INTERNAL_CALL);
+
+  if (start_row == end_row) {
+    String line = buf_get_text(buf, start_row, start_col, end_col, replace_nl, err);
+    if (ERROR_SET(err)) {
+      return rv;
+    }
+
+    ADD(rv, STRING_OBJ(line));
+    return rv;
+  }
+
+  rv.size = (size_t)(end_row - start_row) + 1;
+  rv.items = xcalloc(rv.size, sizeof(Object));
+
+  rv.items[0] = STRING_OBJ(buf_get_text(buf, start_row, start_col, MAXCOL-1, replace_nl, err));
+  if (ERROR_SET(err)) {
+    goto end;
+  }
+
+  if (rv.size > 2) {
+    Array tmp = ARRAY_DICT_INIT;
+    tmp.items = &rv.items[1];
+    if (!buf_collect_lines(buf, rv.size - 2, start_row + 1, replace_nl, &tmp, err)) {
+      goto end;
+    }
+  }
+
+  rv.items[rv.size-1] = STRING_OBJ(buf_get_text(buf, end_row, 0, end_col, replace_nl, err));
+  if (ERROR_SET(err)) {
+    goto end;
+  }
+
+end:
+  if (ERROR_SET(err)) {
+    api_free_array(rv);
+    rv.size = 0;
+    rv.items = NULL;
+  }
+
+  return rv;
+}
+
 /// Returns the byte offset of a line (0-indexed). |api-indexing|
 ///
 /// Line 1 (index=0) has offset 0. UTF-8 bytes are counted. EOL is one byte.
@ -1386,11 +1487,11 @@ static void fix_cursor(linenr_T lo, linenr_T hi, linenr_T extra)
 }

 // Normalizes 0-based indexes to buffer line numbers
-static int64_t normalize_index(buf_T *buf, int64_t index, bool *oob)
+static int64_t normalize_index(buf_T *buf, int64_t index, bool end_exclusive, bool *oob)
 {
  int64_t line_count = buf->b_ml.ml_line_count;
  // Fix if < 0
-  index = index < 0 ? line_count + index +1 : index;
+  index = index < 0 ? line_count + index + (int)end_exclusive : index;

  // Check for oob
  if (index > line_count) {
--- a/src/nvim/api/private/helpers.c
+++ b/src/nvim/api/private/helpers.c
@ -411,7 +411,6 @@ void set_option_to(uint64_t channel_id, void *to, int type, String name, Object
  current_sctx = save_current_sctx;
 }

-
 buf_T *find_buffer_by_handle(Buffer buffer, Error *err)
 {
  if (buffer == 0) {
@ -758,6 +757,52 @@ bool buf_collect_lines(buf_T *buf, size_t n, int64_t start, bool replace_nl, Arr
  return true;
 }

+/// Returns a substring of a buffer line
+///
+/// @param buf          Buffer handle
+/// @param lnum         Line number (1-based)
+/// @param start_col    Starting byte offset into line (0-based)
+/// @param end_col      Ending byte offset into line (0-based, exclusive)
+/// @param replace_nl   Replace newlines ('\n') with null ('\0')
+/// @param err          Error object
+/// @return The text between start_col and end_col on line lnum of buffer buf
+String buf_get_text(buf_T *buf, int64_t lnum, int64_t start_col, int64_t end_col, bool replace_nl,
+                    Error *err)
+{
+  String rv = STRING_INIT;
+
+  if (lnum >= MAXLNUM) {
+    api_set_error(err, kErrorTypeValidation, "Line index is too high");
+    return rv;
+  }
+
+  const char *bufstr = (char *)ml_get_buf(buf, (linenr_T)lnum, false);
+  size_t line_length = strlen(bufstr);
+
+  start_col = start_col < 0 ? (int64_t)line_length + start_col + 1 : start_col;
+  end_col = end_col < 0 ? (int64_t)line_length + end_col + 1 : end_col;
+
+  if (start_col >= MAXCOL || end_col >= MAXCOL) {
+    api_set_error(err, kErrorTypeValidation, "Column index is too high");
+    return rv;
+  }
+
+  if (start_col > end_col) {
+    api_set_error(err, kErrorTypeValidation, "start_col must be less than end_col");
+    return rv;
+  }
+
+  if ((size_t)start_col >= line_length) {
+    return rv;
+  }
+
+  rv = cstrn_to_string(&bufstr[start_col], (size_t)(end_col - start_col));
+  if (replace_nl) {
+    strchrsub(rv.data, '\n', '\0');
+  }
+
+  return rv;
+}

 void api_free_string(String value)
 {
--- a/test/functional/api/buffer_spec.lua
+++ b/test/functional/api/buffer_spec.lua
@ -537,6 +537,34 @@ describe('api/buf', function()
    end)
  end)

+  describe('nvim_buf_get_text', function()
+    local get_text = curbufmeths.get_text
+
+    it('works', function()
+      insert([[
+      hello foo!
+      text]])
+
+      eq({'hello'}, get_text(0, 0, 0, 5, {}))
+      eq({'hello foo!'}, get_text(0, 0, 0, 42, {}))
+      eq({'foo!'}, get_text(0, 6, 0, 10, {}))
+      eq({'foo!', 'tex'}, get_text(0, 6, 1, 3, {}))
+      eq({'foo!', 'tex'}, get_text(-2, 6, -1, 3, {}))
+      eq({''}, get_text(0, 18, 0, 20, {}))
+      eq({'ext'}, get_text(-1, 1, -1, 4, {}))
+    end)
+
+    it('errors on out-of-range', function()
+      eq(false, pcall(get_text, 2, 0, 3, 0, {}))
+      eq(false, pcall(get_text, 0, 0, 4, 0, {}))
+    end)
+
+    it('errors when start is greater than end', function()
+      eq(false, pcall(get_text, 1, 0, 0, 0, {}))
+      eq(false, pcall(get_text, 0, 1, 0, 0, {}))
+    end)
+  end)
+
  describe('nvim_buf_get_offset', function()
    local get_offset = curbufmeths.get_offset
    it('works', function()