mirror of
https://github.com/discourse/discourse.git
synced 2025-02-25 18:55:32 -06:00
auto replace rules in titles
This commit is contained in:
45
lib/text_cleaner.rb
Normal file
45
lib/text_cleaner.rb
Normal file
@@ -0,0 +1,45 @@
|
||||
#
|
||||
# Clean up a text
|
||||
#
|
||||
class TextCleaner
|
||||
|
||||
def self.title_options
|
||||
# cf. http://meta.discourse.org/t/should-we-have-auto-replace-rules-in-titles/5687
|
||||
{
|
||||
deduplicate_exclamation_marks: SiteSetting.title_prettify,
|
||||
deduplicate_question_marks: SiteSetting.title_prettify,
|
||||
replace_all_upper_case: SiteSetting.title_prettify,
|
||||
capitalize_first_letter: SiteSetting.title_prettify,
|
||||
remove_unnecessary_period: SiteSetting.title_prettify,
|
||||
remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.default_locale == "en",
|
||||
fixes_interior_spaces: true,
|
||||
strip_whitespaces: true
|
||||
}
|
||||
end
|
||||
|
||||
def self.clean_title(title)
|
||||
TextCleaner.clean(title, TextCleaner.title_options)
|
||||
end
|
||||
|
||||
def self.clean(text, opts = {})
|
||||
# Replace !!!!! with a single !
|
||||
text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks]
|
||||
# Replace ????? with a single ?
|
||||
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
|
||||
# Replace all-caps text with regular case letters
|
||||
text.tr!('A-Z', 'a-z') if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
|
||||
# Capitalize first letter
|
||||
text.sub!(/\A([a-z])/) { |first| first.capitalize } if opts[:capitalize_first_letter]
|
||||
# Remove unnecessary period at the end
|
||||
text.sub!(/([^.])\.(\s*)\z/, '\1\2') if opts[:remove_unnecessary_period]
|
||||
# Remove extraneous space before the end punctuation
|
||||
text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space]
|
||||
# Fixes interior spaces
|
||||
text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces]
|
||||
# Strip whitespaces
|
||||
text.strip! if opts[:strip_whitespaces]
|
||||
|
||||
text
|
||||
end
|
||||
|
||||
end
|
||||
@@ -1,31 +1,27 @@
|
||||
#
|
||||
# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like
|
||||
# such as leading / trailing space.
|
||||
# Given a string, tell us whether or not is acceptable.
|
||||
#
|
||||
class TextSentinel
|
||||
|
||||
attr_accessor :text
|
||||
|
||||
def initialize(text, opts=nil)
|
||||
@opts = opts || {}
|
||||
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') if text.present?
|
||||
end
|
||||
|
||||
def self.non_symbols_regexp
|
||||
/[\ -\/\[-\`\:-\@\{-\~]/m
|
||||
end
|
||||
|
||||
def initialize(text, opts=nil)
|
||||
@opts = opts || {}
|
||||
|
||||
if text.present?
|
||||
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
||||
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
|
||||
@text.strip! if @opts[:strip]
|
||||
end
|
||||
def self.body_sentinel(text)
|
||||
TextSentinel.new(text, min_entropy: SiteSetting.body_min_entropy)
|
||||
end
|
||||
|
||||
def self.title_sentinel(text)
|
||||
TextSentinel.new(text,
|
||||
min_entropy: SiteSetting.title_min_entropy,
|
||||
max_word_length: SiteSetting.max_word_length,
|
||||
remove_interior_spaces: true,
|
||||
strip: true)
|
||||
max_word_length: SiteSetting.max_word_length)
|
||||
end
|
||||
|
||||
# Entropy is a number of how many unique characters the string needs.
|
||||
@@ -35,7 +31,6 @@ class TextSentinel
|
||||
end
|
||||
|
||||
def valid?
|
||||
|
||||
# Blank strings are not valid
|
||||
return false if @text.blank? || @text.strip.blank?
|
||||
|
||||
@@ -47,12 +42,12 @@ class TextSentinel
|
||||
return false if non_symbols == 0
|
||||
|
||||
# Don't allow super long strings without spaces
|
||||
|
||||
return false if @opts[:max_word_length] && @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/
|
||||
|
||||
# We don't allow all upper case content in english
|
||||
return false if (@text =~ /[A-Z]+/) && (@text == @text.upcase)
|
||||
|
||||
# It is valid
|
||||
true
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user