Merge pull request #3655 from bfredl/enc_default

Default to encoding=utf-8
This commit is contained in:
Justin M. Keyes 2016-01-02 18:05:52 -05:00
commit 59eaba2894
7 changed files with 38 additions and 116 deletions

View File

@ -2130,7 +2130,7 @@ A jump table for the options with a short description can be found at |Q_op|.
'edcompatible' 'ed' Removed. |vim-differences| {Nvim}
*'encoding'* *'enc'* *E543*
'encoding' 'enc' string (default: "utf-8" or value from $LANG)
'encoding' 'enc' string (default: "utf-8")
global
{only available when compiled with the |+multi_byte|
feature}
@ -2152,10 +2152,6 @@ A jump table for the options with a short description can be found at |Q_op|.
can use: >
if has("multi_byte_encoding")
<
Normally 'encoding' will be equal to your current locale. This will
be the default if Vim recognizes your environment settings, otherwise
"utf-8" is used.
When you set this option, it fires the |EncodingChanged| autocommand
event so that you can set up fonts if necessary.
@ -2172,9 +2168,6 @@ A jump table for the options with a short description can be found at |Q_op|.
setting 'encoding' to one of these values instead of utf-8 only has
effect for encoding used for files when 'fileencoding' is empty.
When 'encoding' is set to a Unicode encoding, and 'fileencodings' was
not set yet, the default for 'fileencodings' is changed.
*'endofline'* *'eol'* *'noendofline'* *'noeol'*
'endofline' 'eol' boolean (default on)
local to buffer
@ -2345,9 +2338,7 @@ A jump table for the options with a short description can be found at |Q_op|.
old short name was 'fe', which is no longer used.
*'fileencodings'* *'fencs'*
'fileencodings' 'fencs' string (default: "ucs-bom",
"ucs-bom,utf-8,default,latin1" when
'encoding' is set to a Unicode value)
'fileencodings' 'fencs' string (default: "ucs-bom,utf-8,default,latin1")
global
{only available when compiled with the |+multi_byte|
feature}
@ -2387,9 +2378,8 @@ A jump table for the options with a short description can be found at |Q_op|.
because Vim cannot detect an error, thus the encoding is always
accepted.
The special value "default" can be used for the encoding from the
environment. This is the default value for 'encoding'. It is useful
when 'encoding' is set to "utf-8" and your environment uses a
non-latin1 encoding, such as Russian.
environment. It is useful when 'encoding' is set to "utf-8" and
your environment uses a non-latin1 encoding, such as Russian.
When 'encoding' is "utf-8" and a file contains an illegal byte
sequence it won't be recognized as UTF-8. You can use the |8g8|
command to find the illegal byte sequence.
@ -3776,10 +3766,8 @@ A jump table for the options with a short description can be found at |Q_op|.
change 'iskeyword' instead.
*'iskeyword'* *'isk'*
'iskeyword' 'isk' string (Vim default for
Win32: @,48-57,_,128-167,224-235
otherwise: @,48-57,_,192-255
Vi default: @,48-57,_)
'iskeyword' 'isk' string (default: @,48-57,_,192-255
Vi default: @,48-57,_)
local to buffer
Keywords are used in searching and recognizing with many commands:
"w", "*", "[i", etc. It is also used for "\k" in a |pattern|. See
@ -3791,8 +3779,7 @@ A jump table for the options with a short description can be found at |Q_op|.
When the 'lisp' option is on the '-' character is always included.
*'isprint'* *'isp'*
'isprint' 'isp' string (default for MS-DOS, Win32, and Macintosh:
"@,~-255"; otherwise: "@,161-255")
'isprint' 'isp' string (default: "@,161-255")
global
The characters given by this option are displayed directly on the
screen. It is also used for "\p" in a |pattern|. The characters from

View File

@ -798,6 +798,8 @@ EXTERN bool enc_utf8 INIT(= false); /* UTF-8 encoded Unicode */
EXTERN int enc_latin1like INIT(= TRUE); /* 'encoding' is latin1 comp. */
EXTERN int has_mbyte INIT(= 0); /* any multi-byte encoding */
/// Encoding used when 'fencs' is set to "default"
EXTERN char_u *fenc_default INIT(= NULL);
/*
* To speed up BYTELEN() we fill a table with the byte lengths whenever

View File

@ -568,11 +568,6 @@ char_u * mb_init(void)
/* When enc_utf8 is set or reset, (de)allocate ScreenLinesUC[] */
screenalloc(false);
/* When using Unicode, set default for 'fileencodings'. */
if (enc_utf8 && !option_was_set((char_u *)"fencs"))
set_string_option_direct((char_u *)"fencs", -1,
(char_u *)"ucs-bom,utf-8,default,latin1", OPT_FREE, 0);
#ifdef HAVE_WORKING_LIBINTL
/* GNU gettext 0.10.37 supports this feature: set the codeset used for
* translated messages independently from the current locale. */
@ -2417,11 +2412,8 @@ char_u *enc_canonize(char_u *enc) FUNC_ATTR_NONNULL_RET
int i;
if (STRCMP(enc, "default") == 0) {
/* Use the default encoding as it's found by set_init_1(). */
char_u *r = get_encoding_default();
if (r == NULL)
r = (char_u *)"latin1";
return vim_strsave(r);
// Use the default encoding as found by set_init_1().
return vim_strsave(fenc_default);
}
/* copy "enc" to allocated memory, with room for two '-' */

View File

@ -233,12 +233,6 @@ typedef struct vimoption {
#define P_CURSWANT 0x2000000U /* update curswant required; not needed when
* there is a redraw flag */
#define ISK_LATIN1 (char_u *)"@,48-57,_,192-255"
/* 'isprint' for latin1 is also used for MS-Windows cp1252, where 0x80 is used
* for the currency sign. */
# define ISP_LATIN1 (char_u *)"@,161-255"
#define HIGHLIGHT_INIT \
"8:SpecialKey,~:EndOfBuffer,z:TermCursor,Z:TermCursorNC,@:NonText," \
"d:Directory,e:ErrorMsg,i:IncSearch,l:Search,m:MoreMsg,M:ModeMsg,n:LineNr," \
@ -776,59 +770,18 @@ void set_init_1(void)
/* Parse default for 'listchars'. */
(void)set_chars_option(&p_lcs);
/* enc_locale() will try to find the encoding of the current locale. */
// enc_locale() will try to find the encoding of the current locale.
// This will be used when 'default' is used as encoding specifier
// in 'fileencodings'
char_u *p = enc_locale();
if (p != NULL) {
char_u *save_enc;
/* Try setting 'encoding' and check if the value is valid.
* If not, go back to the default "utf-8". */
save_enc = p_enc;
p_enc = (char_u *) p;
if (STRCMP(p_enc, "gb18030") == 0) {
/* We don't support "gb18030", but "cp936" is a good substitute
* for practical purposes, thus use that. It's not an alias to
* still support conversion between gb18030 and utf-8. */
p_enc = vim_strsave((char_u *)"cp936");
xfree(p);
}
if (mb_init() == NULL) {
opt_idx = findoption((char_u *)"encoding");
if (opt_idx >= 0) {
options[opt_idx].def_val[VI_DEFAULT] = p_enc;
options[opt_idx].flags |= P_DEF_ALLOCED;
}
#if defined(MSWIN) || defined(MACOS)
if (STRCMP(p_enc, "latin1") == 0
|| enc_utf8
) {
/* Adjust the default for 'isprint' and 'iskeyword' to match
* latin1. */
set_string_option_direct((char_u *)"isp", -1,
ISP_LATIN1, OPT_FREE, SID_NONE);
set_string_option_direct((char_u *)"isk", -1,
ISK_LATIN1, OPT_FREE, SID_NONE);
opt_idx = findoption((char_u *)"isp");
if (opt_idx >= 0)
options[opt_idx].def_val[VIM_DEFAULT] = ISP_LATIN1;
opt_idx = findoption((char_u *)"isk");
if (opt_idx >= 0)
options[opt_idx].def_val[VIM_DEFAULT] = ISK_LATIN1;
(void)init_chartab();
}
#endif
} else {
xfree(p_enc);
// mb_init() failed; fallback to utf8 and try again.
p_enc = save_enc;
mb_init();
}
} else {
// enc_locale() failed; initialize the default (utf8).
mb_init();
if (p == NULL) {
// use utf-8 as 'default' if locale encoding can't be detected.
p = vim_strsave((char_u *)"utf-8");
}
fenc_default = p;
// Initialize multibyte (utf-8) handling
mb_init();
// Don't change &encoding when resetting to defaults with ":set all&".
opt_idx = findoption((char_u *)"encoding");
@ -4669,16 +4622,6 @@ char_u *get_highlight_default(void)
return (char_u *)NULL;
}
char_u *get_encoding_default(void)
{
int i;
i = findoption((char_u *)"enc");
if (i >= 0)
return options[i].def_val[VI_DEFAULT];
return (char_u *)NULL;
}
/*
* Translate a string like "t_xx", "<t_xx>" or "<S-Tab>" to a key number.
*/

View File

@ -748,7 +748,7 @@ return {
type='string', list='comma', scope={'global'},
vi_def=true,
varname='p_fencs',
defaults={if_true={vi="ucs-bom"}}
defaults={if_true={vi="ucs-bom,utf-8,default,latin1"}}
},
{
full_name='fileformat', abbreviation='ff',
@ -1285,7 +1285,7 @@ return {
vim=true,
alloced=true,
varname='p_isk',
defaults={if_true={vi="@,48-57,_", vim=macros('ISK_LATIN1')}}
defaults={if_true={vi="@,48-57,_", vim="@,48-57,_,192-255"}}
},
{
full_name='isprint', abbreviation='isp',
@ -1294,10 +1294,7 @@ return {
vi_def=true,
redraw={'all_windows'},
varname='p_isp',
defaults={
condition='MSWIN',
if_true={vi="@,~-255"},
if_false={vi=macros("ISP_LATIN1")}
defaults={if_true={vi="@,161-255"}
}
},
{

View File

@ -21,20 +21,21 @@ describe('&encoding', function()
eq(3, eval('strwidth("Bär")'))
end)
it('is not changed by `set all&`', function()
-- we need to set &encoding to something non-default
-- use 'latin1' when enc&vi is 'utf-8', 'utf-8' otherwise
execute('set fenc=default')
local enc_default, enc_other, width = eval('&fenc'), 'utf-8', 3
if enc_default == 'utf-8' then
enc_other = 'latin1'
width = 4 -- utf-8 string 'Bär' will count as 4 latin1 chars
end
it('can be changed before startup', function()
clear('set enc=latin1')
execute('set encoding=utf-8')
-- error message expected
feed('<cr>')
eq('latin1', eval('&encoding'))
eq(4, eval('strwidth("Bär")'))
end)
clear('set enc=' .. enc_other)
it('is not changed by `set all&`', function()
-- we need to set &encoding to something non-default. Use 'latin1'
clear('set enc=latin1')
execute('set all&')
eq(enc_other, eval('&encoding'))
eq(width, eval('strwidth("Bär")'))
eq('latin1', eval('&encoding'))
eq(4, eval('strwidth("Bär")'))
end)
end)

View File

@ -8,7 +8,7 @@ local Session = require('nvim.session')
local nvim_prog = os.getenv('NVIM_PROG') or 'build/bin/nvim'
local nvim_argv = {nvim_prog, '-u', 'NONE', '-i', 'NONE', '-N',
'--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 encoding=utf-8 undodir=. directory=. viewdir=. backupdir=.',
'--cmd', 'set shortmess+=I background=light noswapfile noautoindent laststatus=1 undodir=. directory=. viewdir=. backupdir=.',
'--embed'}
-- Formulate a path to the directory containing nvim. We use this to