Merge pull request #13145 from janlazo/vim-8.2.0901

vim-patch:8.2.{901,912}
This commit is contained in:
Jan Edmund Lazo 2020-10-23 12:29:05 -04:00 committed by GitHub
commit 2786d96fac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 362 additions and 82 deletions

View File

@ -1615,6 +1615,10 @@ B When joining lines, don't insert a space between two multi-byte
characters. Overruled by the 'M' flag.
1 Don't break a line after a one-letter word. It's broken before it
instead (if possible).
] Respect textwidth rigorously. With this flag set, no line can be
longer than textwidth, unless line-break-prohibition rules make this
impossible. Mainly for CJK scripts and works only if 'encoding' is
"utf-8".
j Where it makes sense, remove a comment leader when joining lines. For
example, joining:
int i; // the index ~

View File

@ -142,7 +142,6 @@ static void changed_common(linenr_T lnum, colnr_T col, linenr_T lnume,
long xtra)
{
int i;
int cols;
pos_T *p;
int add;
@ -170,7 +169,7 @@ static void changed_common(linenr_T lnum, colnr_T col, linenr_T lnume,
if (p->lnum != lnum) {
add = true;
} else {
cols = comp_textwidth(false);
int cols = comp_textwidth(false);
if (cols == 0) {
cols = 79;
}

View File

@ -5549,13 +5549,11 @@ void insertchar(
int second_indent // indent for second line if >= 0
)
{
int textwidth;
char_u *p;
int fo_ins_blank;
int force_format = flags & INSCHAR_FORMAT;
textwidth = comp_textwidth(force_format);
fo_ins_blank = has_format_option(FO_INS_BLANK);
const int textwidth = comp_textwidth(force_format);
const bool fo_ins_blank = has_format_option(FO_INS_BLANK);
/*
* Try to break the line in two or more pieces when:
@ -5756,10 +5754,11 @@ internal_format (
int cc;
int save_char = NUL;
bool haveto_redraw = false;
int fo_ins_blank = has_format_option(FO_INS_BLANK);
int fo_multibyte = has_format_option(FO_MBYTE_BREAK);
int fo_white_par = has_format_option(FO_WHITE_PAR);
int first_line = TRUE;
const bool fo_ins_blank = has_format_option(FO_INS_BLANK);
const bool fo_multibyte = has_format_option(FO_MBYTE_BREAK);
const bool fo_rigor_tw = has_format_option(FO_RIGOROUS_TW);
const bool fo_white_par = has_format_option(FO_WHITE_PAR);
bool first_line = true;
colnr_T leader_len;
bool no_leader = false;
int do_comments = (flags & INSCHAR_DO_COM);
@ -5838,6 +5837,7 @@ internal_format (
curwin->w_cursor.col = startcol;
foundcol = 0;
int skip_pos = 0;
/*
* Find position to break at.
@ -5907,7 +5907,11 @@ internal_format (
foundcol = curwin->w_cursor.col;
if (curwin->w_cursor.col <= (colnr_T)wantcol)
break;
} else if (cc >= 0x100 && fo_multibyte) {
} else if ((cc >= 0x100 || !utf_allow_break_before(cc))
&& fo_multibyte) {
int ncc;
bool allow_break;
// Break after or before a multi-byte character.
if (curwin->w_cursor.col != startcol) {
// Don't break until after the comment leader
@ -5916,8 +5920,11 @@ internal_format (
}
col = curwin->w_cursor.col;
inc_cursor();
// Don't change end_foundcol if already set.
if (foundcol != curwin->w_cursor.col) {
ncc = gchar_cursor();
allow_break = utf_allow_break(cc, ncc);
// If we have already checked this position, skip!
if (curwin->w_cursor.col != skip_pos && allow_break) {
foundcol = curwin->w_cursor.col;
end_foundcol = foundcol;
if (curwin->w_cursor.col <= (colnr_T)wantcol)
@ -5929,6 +5936,7 @@ internal_format (
if (curwin->w_cursor.col == 0)
break;
ncc = cc;
col = curwin->w_cursor.col;
dec_cursor();
@ -5937,17 +5945,56 @@ internal_format (
if (WHITECHAR(cc)) {
continue; // break with space
}
// Don't break until after the comment leader
// Don't break until after the comment leader.
if (curwin->w_cursor.col < leader_len) {
break;
}
curwin->w_cursor.col = col;
skip_pos = curwin->w_cursor.col;
foundcol = curwin->w_cursor.col;
end_foundcol = foundcol;
if (curwin->w_cursor.col <= (colnr_T)wantcol)
break;
allow_break = utf_allow_break(cc, ncc);
// Must handle this to respect line break prohibition.
if (allow_break) {
foundcol = curwin->w_cursor.col;
end_foundcol = foundcol;
}
if (curwin->w_cursor.col <= (colnr_T)wantcol) {
const bool ncc_allow_break = utf_allow_break_before(ncc);
if (allow_break) {
break;
}
if (!ncc_allow_break && !fo_rigor_tw) {
// Enable at most 1 punct hang outside of textwidth.
if (curwin->w_cursor.col == startcol) {
// We are inserting a non-breakable char, postpone
// line break check to next insert.
end_foundcol = foundcol = 0;
break;
}
// Neither cc nor ncc is NUL if we are here, so
// it's safe to inc_cursor.
col = curwin->w_cursor.col;
inc_cursor();
cc = ncc;
ncc = gchar_cursor();
// handle insert
ncc = (ncc != NUL) ? ncc : c;
allow_break = utf_allow_break(cc, ncc);
if (allow_break) {
// Break only when we are not at end of line.
end_foundcol = foundcol = ncc == NUL? 0 : curwin->w_cursor.col;
break;
}
curwin->w_cursor.col = col;
}
}
}
if (curwin->w_cursor.col == 0)
break;
@ -6049,7 +6096,7 @@ internal_format (
}
}
}
first_line = FALSE;
first_line = false;
}
if (State & VREPLACE_FLAG) {
@ -6236,12 +6283,10 @@ static void check_auto_format(
* Set default to window width (maximum 79) for "gq" operator.
*/
int comp_textwidth(
int ff // force formatting (for "gq" command)
bool ff // force formatting (for "gq" command)
)
{
int textwidth;
textwidth = curbuf->b_p_tw;
int textwidth = curbuf->b_p_tw;
if (textwidth == 0 && curbuf->b_p_wm) {
// The width is the window width minus 'wrapmargin' minus all the
// things that add to the margin.

View File

@ -1624,6 +1624,146 @@ int utf_head_off(const char_u *base, const char_u *p)
return (int)(p - q);
}
// Whether space is NOT allowed before/after 'c'.
bool utf_eat_space(int cc)
FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
{
return (cc >= 0x2000 && cc <= 0x206F) // General punctuations
|| (cc >= 0x2e00 && cc <= 0x2e7f) // Supplemental punctuations
|| (cc >= 0x3000 && cc <= 0x303f) // CJK symbols and punctuations
|| (cc >= 0xff01 && cc <= 0xff0f) // Full width ASCII punctuations
|| (cc >= 0xff1a && cc <= 0xff20) // ..
|| (cc >= 0xff3b && cc <= 0xff40) // ..
|| (cc >= 0xff5b && cc <= 0xff65); // ..
}
// Whether line break is allowed before "cc".
bool utf_allow_break_before(int cc)
FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
{
static const int BOL_prohibition_punct[] = {
'!',
'%',
')',
',',
':',
';',
'>',
'?',
']',
'}',
0x2019, // right single quotation mark
0x201d, // ” right double quotation mark
0x2020, // † dagger
0x2021, // ‡ double dagger
0x2026, // … horizontal ellipsis
0x2030, // ‰ per mille sign
0x2031, // ‱ per then thousand sign
0x203c, // ‼ double exclamation mark
0x2047, // ⁇ double question mark
0x2048, // ⁈ question exclamation mark
0x2049, // ⁉ exclamation question mark
0x2103, // ℃ degree celsius
0x2109, // ℉ degree fahrenheit
0x3001, // 、 ideographic comma
0x3002, // 。 ideographic full stop
0x3009, // 〉 right angle bracket
0x300b, // 》 right double angle bracket
0x300d, // 」 right corner bracket
0x300f, // 』 right white corner bracket
0x3011, // 】 right black lenticular bracket
0x3015, // right tortoise shell bracket
0x3017, // 〗 right white lenticular bracket
0x3019, // 〙 right white tortoise shell bracket
0x301b, // 〛 right white square bracket
0xff01, // fullwidth exclamation mark
0xff09, // fullwidth right parenthesis
0xff0c, // fullwidth comma
0xff0e, // fullwidth full stop
0xff1a, // fullwidth colon
0xff1b, // fullwidth semicolon
0xff1f, // fullwidth question mark
0xff3d, // fullwidth right square bracket
0xff5d, // fullwidth right curly bracket
};
int first = 0;
int last = ARRAY_SIZE(BOL_prohibition_punct) - 1;
while (first < last) {
const int mid = (first + last) / 2;
if (cc == BOL_prohibition_punct[mid]) {
return false;
} else if (cc > BOL_prohibition_punct[mid]) {
first = mid + 1;
} else {
last = mid - 1;
}
}
return cc != BOL_prohibition_punct[first];
}
// Whether line break is allowed after "cc".
bool utf_allow_break_after(int cc)
FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
{
static const int EOL_prohibition_punct[] = {
'(',
'<',
'[',
'`',
'{',
// 0x2014, // — em dash
0x2018, // left single quotation mark
0x201c, // “ left double quotation mark
// 0x2053, // swung dash
0x3008, // 〈 left angle bracket
0x300a, // 《 left double angle bracket
0x300c, // 「 left corner bracket
0x300e, // 『 left white corner bracket
0x3010, // 【 left black lenticular bracket
0x3014, // left tortoise shell bracket
0x3016, // 〖 left white lenticular bracket
0x3018, // 〘 left white tortoise shell bracket
0x301a, // 〚 left white square bracket
0xff08, // fullwidth left parenthesis
0xff3b, // fullwidth left square bracket
0xff5b, // fullwidth left curly bracket
};
int first = 0;
int last = ARRAY_SIZE(EOL_prohibition_punct) - 1;
while (first < last) {
const int mid = (first + last)/2;
if (cc == EOL_prohibition_punct[mid]) {
return false;
} else if (cc > EOL_prohibition_punct[mid]) {
first = mid + 1;
} else {
last = mid - 1;
}
}
return cc != EOL_prohibition_punct[first];
}
// Whether line break is allowed between "cc" and "ncc".
bool utf_allow_break(int cc, int ncc)
FUNC_ATTR_CONST FUNC_ATTR_WARN_UNUSED_RESULT
{
// don't break between two-letter punctuations
if (cc == ncc
&& (cc == 0x2014 // em dash
|| cc == 0x2026)) { // horizontal ellipsis
return false;
}
return utf_allow_break_after(cc) && utf_allow_break_before(ncc);
}
/// Copy a character, advancing the pointers
///
/// @param[in,out] fp Source of the character to copy.

View File

@ -3833,7 +3833,8 @@ int do_join(size_t count,
&& (!has_format_option(FO_MBYTE_JOIN)
|| (utf_ptr2char(curr) < 0x100 && endcurr1 < 0x100))
&& (!has_format_option(FO_MBYTE_JOIN2)
|| utf_ptr2char(curr) < 0x100 || endcurr1 < 0x100)
|| (utf_ptr2char(curr) < 0x100 && !utf_eat_space(endcurr1))
|| (endcurr1 < 0x100 && !utf_eat_space(utf_ptr2char(curr))))
) {
/* don't add a space if the line is ending in a space */
if (endcurr1 == ' ')
@ -4158,49 +4159,41 @@ format_lines(
int avoid_fex /* don't use 'formatexpr' */
)
{
int max_len;
int is_not_par; /* current line not part of parag. */
int next_is_not_par; /* next line not part of paragraph */
int is_end_par; /* at end of paragraph */
int prev_is_end_par = FALSE; /* prev. line not part of parag. */
int next_is_start_par = FALSE;
int leader_len = 0; /* leader len of current line */
int next_leader_len; /* leader len of next line */
char_u *leader_flags = NULL; /* flags for leader of current line */
char_u *next_leader_flags; /* flags for leader of next line */
int do_comments; /* format comments */
int do_comments_list = 0; /* format comments with 'n' or '2' */
int advance = TRUE;
int second_indent = -1; /* indent for second line (comment
* aware) */
int do_second_indent;
int do_number_indent;
int do_trail_white;
int first_par_line = TRUE;
bool is_not_par; // current line not part of parag.
bool next_is_not_par; // next line not part of paragraph
bool is_end_par; // at end of paragraph
bool prev_is_end_par = false; // prev. line not part of parag.
bool next_is_start_par = false;
int leader_len = 0; // leader len of current line
int next_leader_len; // leader len of next line
char_u *leader_flags = NULL; // flags for leader of current line
char_u *next_leader_flags; // flags for leader of next line
bool advance = true;
int second_indent = -1; // indent for second line (comment aware)
bool first_par_line = true;
int smd_save;
long count;
int need_set_indent = TRUE; /* set indent of next paragraph */
int force_format = FALSE;
int old_State = State;
bool need_set_indent = true; // set indent of next paragraph
bool force_format = false;
const int old_State = State;
/* length of a line to force formatting: 3 * 'tw' */
max_len = comp_textwidth(TRUE) * 3;
// length of a line to force formatting: 3 * 'tw'
const int max_len = comp_textwidth(true) * 3;
/* check for 'q', '2' and '1' in 'formatoptions' */
do_comments = has_format_option(FO_Q_COMS);
do_second_indent = has_format_option(FO_Q_SECOND);
do_number_indent = has_format_option(FO_Q_NUMBER);
do_trail_white = has_format_option(FO_WHITE_PAR);
// check for 'q', '2' and '1' in 'formatoptions'
const bool do_comments = has_format_option(FO_Q_COMS); // format comments
int do_comments_list = 0; // format comments with 'n' or '2'
const bool do_second_indent = has_format_option(FO_Q_SECOND);
const bool do_number_indent = has_format_option(FO_Q_NUMBER);
const bool do_trail_white = has_format_option(FO_WHITE_PAR);
/*
* Get info about the previous and current line.
*/
if (curwin->w_cursor.lnum > 1)
is_not_par = fmt_check_par(curwin->w_cursor.lnum - 1
, &leader_len, &leader_flags, do_comments
);
else
is_not_par = TRUE;
// Get info about the previous and current line.
if (curwin->w_cursor.lnum > 1) {
is_not_par = fmt_check_par(curwin->w_cursor.lnum - 1,
&leader_len, &leader_flags, do_comments);
} else {
is_not_par = true;
}
next_is_not_par = fmt_check_par(curwin->w_cursor.lnum
, &next_leader_len, &next_leader_flags, do_comments
);
@ -4225,7 +4218,7 @@ format_lines(
* The last line to be formatted.
*/
if (count == 1 || curwin->w_cursor.lnum == curbuf->b_ml.ml_line_count) {
next_is_not_par = TRUE;
next_is_not_par = true;
next_leader_len = 0;
next_leader_flags = NULL;
} else {
@ -4236,7 +4229,7 @@ format_lines(
next_is_start_par =
(get_number_indent(curwin->w_cursor.lnum + 1) > 0);
}
advance = TRUE;
advance = true;
is_end_par = (is_not_par || next_is_not_par || next_is_start_par);
if (!is_end_par && do_trail_white)
is_end_par = !ends_in_white(curwin->w_cursor.lnum);
@ -4287,7 +4280,7 @@ format_lines(
leader_len, leader_flags,
next_leader_len, next_leader_flags)
)
is_end_par = TRUE;
is_end_par = true;
/*
* If we have got to the end of a paragraph, or the line is
@ -4324,9 +4317,9 @@ format_lines(
* end of the paragraph. */
if (line_count < 0)
break;
first_par_line = TRUE;
first_par_line = true;
}
force_format = FALSE;
force_format = false;
}
/*
@ -4334,7 +4327,7 @@ format_lines(
* first delete the leader from the second line.
*/
if (!is_end_par) {
advance = FALSE;
advance = false;
curwin->w_cursor.lnum++;
curwin->w_cursor.col = 0;
if (line_count < 0 && u_save_cursor() == FAIL)
@ -4357,12 +4350,13 @@ format_lines(
beep_flush();
break;
}
first_par_line = FALSE;
/* If the line is getting long, format it next time */
if (STRLEN(get_cursor_line_ptr()) > (size_t)max_len)
force_format = TRUE;
else
force_format = FALSE;
first_par_line = false;
// If the line is getting long, format it next time
if (STRLEN(get_cursor_line_ptr()) > (size_t)max_len) {
force_format = true;
} else {
force_format = false;
}
}
}
line_breakcheck();
@ -4423,11 +4417,10 @@ static int fmt_check_par(linenr_T lnum, int *leader_len, char_u **leader_flags,
int paragraph_start(linenr_T lnum)
{
char_u *p;
int leader_len = 0; /* leader len of current line */
char_u *leader_flags = NULL; /* flags for leader of current line */
int next_leader_len = 0; /* leader len of next line */
char_u *next_leader_flags = NULL; /* flags for leader of next line */
int do_comments; /* format comments */
int leader_len = 0; // leader len of current line
char_u *leader_flags = NULL; // flags for leader of current line
int next_leader_len = 0; // leader len of next line
char_u *next_leader_flags = NULL; // flags for leader of next line
if (lnum <= 1)
return TRUE; /* start of the file */
@ -4436,7 +4429,7 @@ int paragraph_start(linenr_T lnum)
if (*p == NUL)
return TRUE; /* after empty line */
do_comments = has_format_option(FO_Q_COMS);
const bool do_comments = has_format_option(FO_Q_COMS); // format comments
if (fmt_check_par(lnum - 1, &leader_len, &leader_flags, do_comments)) {
return true; // after non-paragraph line
}

View File

@ -6803,7 +6803,8 @@ static void langmap_set(void)
/// Return true if format option 'x' is in effect.
/// Take care of no formatting when 'paste' is set.
int has_format_option(int x)
bool has_format_option(int x)
FUNC_ATTR_PURE FUNC_ATTR_WARN_UNUSED_RESULT
{
if (p_paste) {
return false;

View File

@ -77,12 +77,13 @@
#define FO_ONE_LETTER '1'
#define FO_WHITE_PAR 'w' // trailing white space continues paragr.
#define FO_AUTO 'a' // automatic formatting
#define FO_RIGOROUS_TW ']' // respect textwidth rigorously
#define FO_REMOVE_COMS 'j' // remove comment leaders when joining lines
#define FO_PERIOD_ABBR 'p' // don't break a single space after a period
#define DFLT_FO_VI "vt"
#define DFLT_FO_VIM "tcqj"
#define FO_ALL "tcroq2vlb1mMBn,awjp" // for do_set()
#define FO_ALL "tcroq2vlb1mMBn,aw]jp" // for do_set()
// characters for the p_cpo option:
#define CPO_ALTREAD 'a' // ":read" sets alternate file name

View File

@ -0,0 +1,97 @@
scriptencoding utf-8
func Run_cjk_linebreak_after(rigorous)
set textwidth=12
for punct in [
\ '!', '%', ')', ',', ':', ';', '>', '?', ']', '}', '', '”', '†', '‡',
\ '…', '‰', '‱', '‼', '⁇', '⁈', '⁉', '℃', '℉', '、', '。', '〉', '》',
\ '」', '』', '】', '', '〗', '〙', '〛', '', '', '', '', '',
\ '', '', '', '']
call setline('.', '这是一个测试' .. punct.'试试 CJK 行禁则补丁。')
normal gqq
if a:rigorous
call assert_equal('这是一个测', getline(1))
else
call assert_equal('这是一个测试' .. punct, getline(1))
endif
%d_
endfor
endfunc
func Test_cjk_linebreak_after()
set formatoptions=croqn2mB1j
call Run_cjk_linebreak_after(0)
endfunc
func Test_cjk_linebreak_after_rigorous()
set formatoptions=croqn2mB1j]
call Run_cjk_linebreak_after(1)
endfunc
func Run_cjk_linebreak_before()
set textwidth=12
for punct in [
\ '(', '<', '[', '`', '{', '', '“', '〈', '《', '「', '『', '【', '',
\ '〖', '〘', '〚', '', '', '']
call setline('.', '这是个测试' .. punct.'试试 CJK 行禁则补丁。')
normal gqq
call assert_equal('这是个测试', getline(1))
%d_
endfor
endfunc
func Test_cjk_linebreak_before()
set formatoptions=croqn2mB1j
call Run_cjk_linebreak_before()
endfunc
func Test_cjk_linebreak_before_rigorous()
set formatoptions=croqn2mB1j]
call Run_cjk_linebreak_before()
endfunc
func Run_cjk_linebreak_nobetween(rigorous)
" …… must not start a line
call setline('.', '这是个测试……试试 CJK 行禁则补丁。')
set textwidth=12 ambiwidth=double
normal gqq
if a:rigorous
call assert_equal('这是个测', getline(1))
else
call assert_equal('这是个测试……', getline(1))
endif
%d_
call setline('.', '这是一个测试……试试 CJK 行禁则补丁。')
set textwidth=12 ambiwidth=double
normal gqq
call assert_equal('这是一个测', getline(1))
%d_
" but —— can
call setline('.', '这是个测试——试试 CJK 行禁则补丁。')
set textwidth=12 ambiwidth=double
normal gqq
call assert_equal('这是个测试', getline(1))
endfunc
func Test_cjk_linebreak_nobetween()
set formatoptions=croqn2mB1j
call Run_cjk_linebreak_nobetween(0)
endfunc
func Test_cjk_linebreak_nobetween_rigorous()
set formatoptions=croqn2mB1j]
call Run_cjk_linebreak_nobetween(1)
endfunc
func Test_cjk_linebreak_join_punct()
for punct in ['——', '〗', '', '。', '……']
call setline(1, '文本文本' .. punct)
call setline(2, 'English')
set formatoptions=croqn2mB1j
normal ggJ
call assert_equal('文本文本' .. punct.'English', getline(1))
%d_
endfor
endfunc