auto replace rules in titles

This commit is contained in:
Régis Hanol
2013-04-10 11:00:50 +02:00
parent 33e3ad1603
commit c5cf8be864
14 changed files with 280 additions and 91 deletions

45
lib/text_cleaner.rb Normal file
View File

@@ -0,0 +1,45 @@
#
# Clean up a text
#
class TextCleaner
def self.title_options
# cf. http://meta.discourse.org/t/should-we-have-auto-replace-rules-in-titles/5687
{
deduplicate_exclamation_marks: SiteSetting.title_prettify,
deduplicate_question_marks: SiteSetting.title_prettify,
replace_all_upper_case: SiteSetting.title_prettify,
capitalize_first_letter: SiteSetting.title_prettify,
remove_unnecessary_period: SiteSetting.title_prettify,
remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.default_locale == "en",
fixes_interior_spaces: true,
strip_whitespaces: true
}
end
def self.clean_title(title)
TextCleaner.clean(title, TextCleaner.title_options)
end
def self.clean(text, opts = {})
# Replace !!!!! with a single !
text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks]
# Replace ????? with a single ?
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
# Replace all-caps text with regular case letters
text.tr!('A-Z', 'a-z') if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
# Capitalize first letter
text.sub!(/\A([a-z])/) { |first| first.capitalize } if opts[:capitalize_first_letter]
# Remove unnecessary period at the end
text.sub!(/([^.])\.(\s*)\z/, '\1\2') if opts[:remove_unnecessary_period]
# Remove extraneous space before the end punctuation
text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space]
# Fixes interior spaces
text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces]
# Strip whitespaces
text.strip! if opts[:strip_whitespaces]
text
end
end

View File

@@ -1,31 +1,27 @@
#
# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like
# such as leading / trailing space.
# Given a string, tell us whether or not is acceptable.
#
class TextSentinel
attr_accessor :text
def initialize(text, opts=nil)
@opts = opts || {}
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') if text.present?
end
def self.non_symbols_regexp
/[\ -\/\[-\`\:-\@\{-\~]/m
end
def initialize(text, opts=nil)
@opts = opts || {}
if text.present?
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
@text.strip! if @opts[:strip]
end
def self.body_sentinel(text)
TextSentinel.new(text, min_entropy: SiteSetting.body_min_entropy)
end
def self.title_sentinel(text)
TextSentinel.new(text,
min_entropy: SiteSetting.title_min_entropy,
max_word_length: SiteSetting.max_word_length,
remove_interior_spaces: true,
strip: true)
max_word_length: SiteSetting.max_word_length)
end
# Entropy is a number of how many unique characters the string needs.
@@ -35,7 +31,6 @@ class TextSentinel
end
def valid?
# Blank strings are not valid
return false if @text.blank? || @text.strip.blank?
@@ -47,12 +42,12 @@ class TextSentinel
return false if non_symbols == 0
# Don't allow super long strings without spaces
return false if @opts[:max_word_length] && @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/
# We don't allow all upper case content in english
return false if (@text =~ /[A-Z]+/) && (@text == @text.upcase)
# It is valid
true
end