markdown: parse emails and phone numbers (#365)

* markdown: parse emails and phone numbers

* phone parsing

* refactor
This commit is contained in:
Evgeny Poberezkin
2022-02-24 07:55:18 +00:00
committed by GitHub
parent b7a06dd0cf
commit 51f5982205
9 changed files with 105 additions and 86 deletions

View File

@@ -586,7 +586,6 @@ class FormattedText(val text: String, val format: Format? = null) {
sealed class Format {
@Serializable @SerialName("bold") class Bold: Format()
@Serializable @SerialName("italic") class Italic: Format()
@Serializable @SerialName("underline") class Underline: Format()
@Serializable @SerialName("strikeThrough") class StrikeThrough: Format()
@Serializable @SerialName("snippet") class Snippet: Format()
@Serializable @SerialName("secret") class Secret: Format()
@@ -598,7 +597,6 @@ sealed class Format {
val style: SpanStyle @Composable get() = when (this) {
is Bold -> SpanStyle(fontWeight = FontWeight.Bold)
is Italic -> SpanStyle(fontStyle = FontStyle.Italic)
is Underline -> SpanStyle(textDecoration = TextDecoration.Underline)
is StrikeThrough -> SpanStyle(textDecoration = TextDecoration.LineThrough)
is Snippet -> SpanStyle(fontFamily = FontFamily.Monospace)
is Secret -> SpanStyle(color = HighOrLowlight, background = HighOrLowlight)

View File

@@ -667,7 +667,6 @@ struct FormattedText: Decodable {
enum Format: Decodable {
case bold
case italic
case underline
case strikeThrough
case snippet
case secret

View File

@@ -23,6 +23,7 @@ dependencies:
- containers == 0.6.*
- cryptonite >= 0.27 && < 0.30
- directory == 1.3.*
- email-validate == 2.3.*
- exceptions == 0.10.*
- filepath == 1.4.*
- mtl == 2.2.*

View File

@@ -57,6 +57,7 @@ library
, containers ==0.6.*
, cryptonite >=0.27 && <0.30
, directory ==1.3.*
, email-validate ==2.3.*
, exceptions ==0.10.*
, filepath ==1.4.*
, mtl ==2.2.*
@@ -92,6 +93,7 @@ executable simplex-chat
, containers ==0.6.*
, cryptonite >=0.27 && <0.30
, directory ==1.3.*
, email-validate ==2.3.*
, exceptions ==0.10.*
, filepath ==1.4.*
, mtl ==2.2.*
@@ -134,6 +136,7 @@ test-suite simplex-chat-test
, containers ==0.6.*
, cryptonite >=0.27 && <0.30
, directory ==1.3.*
, email-validate ==2.3.*
, exceptions ==0.10.*
, filepath ==1.4.*
, hspec ==2.7.*

View File

@@ -152,7 +152,6 @@ markdownInfo =
[ green "Markdown:",
indent <> highlight "*bold* " <> " - " <> markdown Bold "bold text",
indent <> highlight "_italic_ " <> " - " <> markdown Italic "italic text" <> " (shown as underlined)",
indent <> highlight "+underlined+ " <> " - " <> markdown Underline "underlined text",
indent <> highlight "~strikethrough~" <> " - " <> markdown StrikeThrough "strikethrough text" <> " (shown as inverse)",
indent <> highlight "`code snippet` " <> " - " <> markdown Snippet "a + b // no *markdown* here",
indent <> highlight "!1 text! " <> " - " <> markdown (colored Red) "red text" <> " (1-6: red, green, blue, yellow, cyan, magenta)",

View File

@@ -4,23 +4,23 @@
module Simplex.Chat.Markdown where
import Control.Applicative ((<|>))
import Control.Applicative (optional, (<|>))
import Data.Aeson (ToJSON)
import qualified Data.Aeson as J
import Data.Attoparsec.Text (Parser)
import qualified Data.Attoparsec.Text as A
import Data.Bifunctor (second)
import Data.Char (isDigit)
import Data.Either (fromRight)
import Data.Functor (($>))
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M
import Data.Maybe (isNothing)
import Data.Maybe (fromMaybe, isNothing)
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import Data.Text.Encoding (encodeUtf8)
import GHC.Generics
import Simplex.Messaging.Parsers (fstToLower, sumTypeJSON)
import System.Console.ANSI.Types
import qualified Text.Email.Validate as Email
data Markdown = Markdown (Maybe Format) Text | Markdown :|: Markdown
deriving (Eq, Show)
@@ -28,7 +28,6 @@ data Markdown = Markdown (Maybe Format) Text | Markdown :|: Markdown
data Format
= Bold
| Italic
| Underline
| StrikeThrough
| Snippet
| Secret
@@ -89,47 +88,6 @@ type MarkdownList = [FormattedText]
unmarked :: Text -> Markdown
unmarked = Markdown Nothing
colorMD :: Char
colorMD = '!'
secretMD :: Char
secretMD = '#'
formats :: Map Char Format
formats =
M.fromList
[ ('*', Bold),
('_', Italic),
('+', Underline),
('~', StrikeThrough),
('`', Snippet),
(secretMD, Secret),
(colorMD, colored White)
]
colors :: Map Text FormatColor
colors =
M.fromList . map (second FormatColor) $
[ ("red", Red),
("green", Green),
("blue", Blue),
("yellow", Yellow),
("cyan", Cyan),
("magenta", Magenta),
("r", Red),
("g", Green),
("b", Blue),
("y", Yellow),
("c", Cyan),
("m", Magenta),
("1", Red),
("2", Green),
("3", Blue),
("4", Yellow),
("5", Cyan),
("6", Magenta)
]
parseMaybeMarkdownList :: Text -> Maybe MarkdownList
parseMaybeMarkdownList s =
let m = markdownToList $ parseMarkdown s
@@ -151,52 +109,76 @@ markdownP = mconcat <$> A.many' fragmentP
fragmentP :: Parser Markdown
fragmentP =
A.peekChar >>= \case
Just ' ' -> unmarked <$> A.takeWhile (== ' ')
Just c -> case M.lookup c formats of
Just Secret -> A.char secretMD *> secretP
Just (Colored (FormatColor White)) -> A.char colorMD *> coloredP
Just f -> A.char c *> formattedP c "" f
Nothing -> wordsP
Just c -> case c of
' ' -> unmarked <$> A.takeWhile (== ' ')
'+' -> phoneP <|> wordP
'*' -> formattedP '*' Bold
'_' -> formattedP '_' Italic
'~' -> formattedP '~' StrikeThrough
'`' -> formattedP '`' Snippet
'#' -> A.char '#' *> secretP
'!' -> coloredP <|> wordP
_
| isDigit c -> phoneP <|> wordP
| otherwise -> wordP
Nothing -> fail ""
formattedP :: Char -> Text -> Format -> Parser Markdown
formattedP c p f = do
s <- A.takeTill (== c)
(A.char c $> md c p f s) <|> noFormat (c `T.cons` p <> s)
md :: Char -> Text -> Format -> Text -> Markdown
md c p f s
formattedP :: Char -> Format -> Parser Markdown
formattedP c f = do
s <- A.char c *> A.takeTill (== c)
(A.char c $> md c f s) <|> noFormat (c `T.cons` s)
md :: Char -> Format -> Text -> Markdown
md c f s
| T.null s || T.head s == ' ' || T.last s == ' ' =
unmarked $ c `T.cons` p <> s `T.snoc` c
unmarked $ c `T.cons` s `T.snoc` c
| otherwise = markdown f s
secretP :: Parser Markdown
secretP = secret <$> A.takeWhile (== secretMD) <*> A.takeTill (== secretMD) <*> A.takeWhile (== secretMD)
secretP = secret <$> A.takeWhile (== '#') <*> A.takeTill (== '#') <*> A.takeWhile (== '#')
secret :: Text -> Text -> Text -> Markdown
secret b s a
| T.null a || T.null s || T.head s == ' ' || T.last s == ' ' =
unmarked $ secretMD `T.cons` ss
unmarked $ '#' `T.cons` ss
| otherwise = markdown Secret $ T.init ss
where
ss = b <> s <> a
coloredP :: Parser Markdown
coloredP = do
cStr <- A.takeWhile (\c -> c /= ' ' && c /= colorMD)
case M.lookup cStr colors of
Just c ->
let f = Colored c
in (A.char ' ' *> formattedP colorMD (cStr `T.snoc` ' ') f)
<|> noFormat (colorMD `T.cons` cStr)
_ -> noFormat (colorMD `T.cons` cStr)
wordsP :: Parser Markdown
wordsP = do
word <- wordMD <$> A.takeTill (== ' ')
s <- (word <>) <$> (unmarked <$> A.takeWhile (== ' '))
A.peekChar >>= \case
Nothing -> pure s
Just c -> case M.lookup c formats of
Just _ -> pure s
Nothing -> (s <>) <$> wordsP
clr <- A.char '!' *> colorP <* A.space
s <- ((<>) <$> A.takeWhile1 (\c -> c /= ' ' && c /= '!') <*> A.takeTill (== '!')) <* A.char '!'
if T.null s || T.last s == ' '
then fail "not colored"
else pure $ markdown (colored clr) s
colorP =
A.anyChar >>= \case
'r' -> "ed" $> Red <|> pure Red
'g' -> "reen" $> Green <|> pure Green
'b' -> "lue" $> Blue <|> pure Blue
'y' -> "ellow" $> Yellow <|> pure Yellow
'c' -> "yan" $> Cyan <|> pure Cyan
'm' -> "agenta" $> Magenta <|> pure Magenta
'1' -> pure Red
'2' -> pure Green
'3' -> pure Blue
'4' -> pure Yellow
'5' -> pure Cyan
'6' -> pure Magenta
_ -> fail "not color"
phoneP = do
country <- optional $ T.cons <$> A.char '+' <*> A.takeWhile1 isDigit
code <- optional $ conc4 <$> phoneSep <*> "(" <*> A.takeWhile1 isDigit <*> ")"
segments <- mconcat <$> A.many' ((<>) <$> phoneSep <*> A.takeWhile1 isDigit)
let s = fromMaybe "" country <> fromMaybe "" code <> segments
len = T.length s
if 7 <= len && len <= 22 then pure $ markdown Phone s else fail "not phone"
conc4 s1 s2 s3 s4 = s1 <> s2 <> s3 <> s4
phoneSep = " " <|> "-" <|> "." <|> ""
wordP :: Parser Markdown
wordP = wordMD <$> A.takeTill (== ' ')
wordMD :: Text -> Markdown
wordMD s
| "http://" `T.isPrefixOf` s || "https://" `T.isPrefixOf` s || "simplex:/" `T.isPrefixOf` s = markdown Uri s
| T.null s = unmarked s
| isUri s = markdown Uri s
| isEmail s = markdown Email s
| otherwise = unmarked s
noFormat :: Text -> Parser Markdown
isUri s = "http://" `T.isPrefixOf` s || "https://" `T.isPrefixOf` s || "simplex:/" `T.isPrefixOf` s
isEmail s = T.any (== '@') s && Email.isValid (encodeUtf8 s)
noFormat = pure . unmarked

View File

@@ -70,7 +70,6 @@ sgr :: Format -> [SGR]
sgr = \case
Bold -> [SetConsoleIntensity BoldIntensity]
Italic -> [SetUnderlining SingleUnderline, SetItalicized True]
Underline -> [SetUnderlining SingleUnderline]
StrikeThrough -> [SetSwapForegroundBackground True]
Colored (FormatColor c) -> [SetColor Foreground Vivid c]
Secret -> [SetColor Foreground Dull Black, SetColor Background Dull Black]

View File

@@ -14,6 +14,8 @@ markdownTests = do
secretText
textColor
textWithUri
textWithEmail
textWithPhone
textFormat :: Spec
textFormat = describe "text format (bold)" do
@@ -141,3 +143,40 @@ textWithUri = describe "text with Uri" do
it "ignored as markdown" do
parseMarkdown "_https://simplex.chat" `shouldBe` "_https://simplex.chat"
parseMarkdown "this is _https://simplex.chat" `shouldBe` "this is _https://simplex.chat"
email :: Text -> Markdown
email = Markdown $ Just Email
textWithEmail :: Spec
textWithEmail = describe "text with Email" do
it "correct markdown" do
parseMarkdown "chat@simplex.chat" `shouldBe` email "chat@simplex.chat"
parseMarkdown "test chat@simplex.chat" `shouldBe` "test " <> email "chat@simplex.chat"
parseMarkdown "test chat+123@simplex.chat" `shouldBe` "test " <> email "chat+123@simplex.chat"
parseMarkdown "test chat.chat+123@simplex.chat" `shouldBe` "test " <> email "chat.chat+123@simplex.chat"
parseMarkdown "chat@simplex.chat test" `shouldBe` email "chat@simplex.chat" <> " test"
parseMarkdown "test1 chat@simplex.chat test2" `shouldBe` "test1 " <> email "chat@simplex.chat" <> " test2"
it "ignored as markdown" do
parseMarkdown "chat @simplex.chat" `shouldBe` "chat @simplex.chat"
parseMarkdown "this is chat @simplex.chat" `shouldBe` "this is chat @simplex.chat"
phone :: Text -> Markdown
phone = Markdown $ Just Phone
textWithPhone :: Spec
textWithPhone = describe "text with Phone" do
it "correct markdown" do
parseMarkdown "07777777777" `shouldBe` phone "07777777777"
parseMarkdown "test 07777777777" `shouldBe` "test " <> phone "07777777777"
parseMarkdown "07777777777 test" `shouldBe` phone "07777777777" <> " test"
parseMarkdown "test1 07777777777 test2" `shouldBe` "test1 " <> phone "07777777777" <> " test2"
parseMarkdown "test 07777 777 777 test" `shouldBe` "test " <> phone "07777 777 777" <> " test"
parseMarkdown "test +447777777777 test" `shouldBe` "test " <> phone "+447777777777" <> " test"
parseMarkdown "test +44 (0) 7777 777 777 test" `shouldBe` "test " <> phone "+44 (0) 7777 777 777" <> " test"
parseMarkdown "test +44-7777-777-777 test" `shouldBe` "test " <> phone "+44-7777-777-777" <> " test"
parseMarkdown "test +44 (0) 7777.777.777 https://simplex.chat test"
`shouldBe` "test " <> phone "+44 (0) 7777.777.777" <> " " <> uri "https://simplex.chat" <> " test"
it "ignored as markdown (too short)" $
parseMarkdown "test 077777 test" `shouldBe` "test 077777 test"
it "ignored as markdown (double spaces)" $
parseMarkdown "test 07777 777 777 test" `shouldBe` "test 07777 777 777 test"

View File

@@ -1,5 +1,4 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE NamedFieldPuns #-}
module MobileTests where