sublimemediumRule

Fake voicemail notification (untrusted sender)

This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve. The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.

MITRE ATT&CK

T1566 T1566.001 T1566.002 T1598

initial-access

Detection Query

type.inbound
// contains links or attachments
and (
  (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
  and 0 <= length(distinct(attachments, .md5)) <= 8
)

// the subject or display_name need some keywords which are voicemail related
and (
  any([subject.subject, sender.display_name],
      regex.icontains(.,
                      // split phrases that occur within 3 words between or only punctuation between them
                      '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
                      // regex specific to v-mail, v_msg, v,mail, etc
                      // list of "secondary" words synced with regex above this one
                      'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
                      // split phrases that start with "caller" that occur within 3 words between or only punctation
                      'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
                      // strong phrases
                      '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
                      // starts in the format of `(4)` and contains some voicemail keywords
                      '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
                      'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',

                      // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
                      // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
                      '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
                      // obfuscated phone number with at least one digit in the prefix
                      // XXX-555-5555, XXX-5XX-XXXX
                      '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
                      // obfuscated voicemail/voicemessage keywords
                      'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
                      'v[o0][il1]cem[\*X\.\-_]{2,}',
                      // "X new voice..." patterns
                      '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
                      // sent-message patterns
                      '(?:sent|new|incoming)[\s\-]+message.*(v[o0][il1]ce|<.*@.*>)',
      )
  )
  // body.current_thread.text inspection should be very specific to avoid FP
  or regex.icontains(strings.replace_confusables(body.current_thread.text),
                     // body.current_thread.text,
                     'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
                     '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
                     'v[o0][il1]cema[il1][li1] (is )?attached',
                     'an? (?:new )?encrypted v[o0][il1]cemail',
                     'a (?:new )?pending message',
                     'Your? have (?: an?)?incoming v[o0][il1]ceRec',
                     "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
                     'New V[o0][il1]cema[il1][li1] Received',
                     'New m[il1]ssed ca[li1][li1] record',
                     '\bv[o0][il1]cema[il1][li1] transcript\b',
                     'Listen to V[o0][il1]ceMa[il1][li1]',
                     'New v[o0][il1]cema[il1][li1] from',
                     'v[o0][il1]ce note'
  )
  // pull out two regexes that could benefit from negations
  or (
    regex.icontains(body.current_thread.text,
                    // body.current_thread.text,
                    '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
                    'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
    )
    and not regex.icontains(body.current_thread.text,
                            '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
    )
    and not regex.icontains(body.current_thread.text,
                            'you (?:have |received )my voice\s?(?:mail|audio|message)'
    )
  )
  // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
  or (
    length(attachments) > 0
    and (
      all(attachments,
          .file_type in $file_types_images
          and beta.parse_exif(.).image_height != 1
      )
      // there is a mix of fake audio attachments and images
      or (
        length(filter(attachments,
                      strings.starts_with(.content_type, "audio")
                      // confirm the content type with .file_type
                      // we have seen attachments claim to be audio/* files, only to be exploded as something else
                      and not .file_type in ("wav", "mp3")
               )
        ) 
        // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
        // meaning, all attachments that are NOT fake audio attachments MUST be images
        + length(filter(attachments,
                        .file_type in $file_types_images
                        and beta.parse_exif(.).image_height != 1
                 )
        ) == length(attachments)
      )
    )
    and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
            regex.icontains(.scan.ocr.raw,
                            // body.current_thread.text,
                            'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
                            '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
                            'v[o0][il1]cema[il1][li1] (is )?attached',
                            'an? (?:new )?encrypted v[o0][il1]cemail',
                            'a (?:new )?pending message',
                            'Your? have (?: an?)?incoming v[o0][il1]ceRec',
                            "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
                            'New V[o0][il1]cema[il1][li1] Received',
                            'New m[il1]ssed ca[li1][li1] record',
                            '\bv[o0][il1]cema[il1][li1] transcript\b',
                            'Listen to V[o0][il1]ceMa[il1][li1]',
                            'New v[o0][il1]cema[il1][li1] from',
                            'v[o0][il1]ce note'
            )
            or (
              regex.icontains(.scan.ocr.raw,
                              // body.current_thread.text,
                              '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
                              'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
              )
              and not regex.icontains(body.current_thread.text,
                                      '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
              )
              and not regex.icontains(body.current_thread.text,
                                      'you (?:have |received )my voice\s?(?:mail|audio|message)'
              )
            )
    )
  )
  or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
  or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
)
and 2 of (
  (
    // the sender is a freemail
    sender.email.domain.root_domain in $free_email_providers
  ),
  (
    any(ml.nlu_classifier(body.current_thread.text).intents,
        .name in ("cred_theft") and .confidence in ("medium", "high")
    )
    or 
    // use the OCR from the message screenshot
    any(filter(file.explode(file.message_screenshot()), .depth == 0),
        any(ml.nlu_classifier(.scan.ocr.raw).intents,
            .name in ("cred_theft") and .confidence in ("medium", "high")
        )
    )
  ),
  (
    any(attachments,
        .content_type in ("html", "text", "text/html")
        and any(ml.logo_detect(file.html_screenshot(.)).brands,
                .name in ("Microsoft") and .confidence in ("medium", "high")
        )
    )
  ),
  (
    regex.icontains(sender.display_name,
                    '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
    )
  ),
  // attachment names are often HTML and voice mail related
  (
    any(attachments,
        // this logic is reused below for eml attachments
        // ensure updates occur both places
        (
          .content_type in ("html", "text", "text/html")
          or .file_type in ("html", "unknown", "svg")
          or .file_type == "pdf"
        )
        and (
          regex.icontains(.file_name,
                          '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
                          // contains a time
                          // 01min , 60secs
                          '0?[1-9]\s*min(?:(?:ute)?s)?',
                          '\d{1,2}\s*s(?:ec(?:ond)?s)?',
                          // (00:50s)
                          // 3:26 seconds
                          '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                          // 03min25secs
                          '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                          // [0:39]
                          // (0:39)
                          '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
                          // contains an emoji
                          '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
          )
          // somtimes there is no name, it's just the extension which is also strange
          or .file_name in~ (".htm", ".html")
          // or sometimes it has no name....
          or .file_name is null
        )
    )
  ),
  // attachment contains javascript
  (
    any(attachments,
        (
          .content_type in ("html", "text", "text/html")
          or .file_type in ("html", "unknown", "svg")
        )
        and (
          (
            .size < 1500
            and any(file.explode(.), length(.scan.html.scripts) > 0)
          )
          // bypass the size requirement under these conditions
          or (
            // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
            strings.ilike(file.parse_text(.,
                                          encodings=[
                                            "ascii",
                                            "utf8",
                                            "utf16-le"
                                          ]
                          ).text,
                          "*onload*",
                          "*window.location.href*",
                          "*onerror*",
                          "*CDATA*",
                          "*<script*",
                          "*</script*",
                          "*atob*",
                          "*location.assign*",
                          "*decodeURIComponent*"
            )
          )
        )
    )
  ),
  (
    any(attachments,
        (
          .content_type in ("html", "text", "text/html")
          or .file_type in ("html", "unknown", "svg")
        )
        and any(recipients.to,
                // the html attachment contains a receipient email address
                strings.contains(file.parse_html(..).raw, .email.email)
                // the sld of the domain is in the attachment name
                or strings.contains(..file_name, .email.domain.sld)
        )
    )
  ),
  // eml attachments
  (
    any(filter(attachments,
               .content_type == "message/rfc822" or .file_extension in ('eml')
        ),
        // which contain attachments
        // this is the same logic as above
        any(file.parse_eml(.).attachments,
            (
              .content_type in ("html", "text", "text/html")
              or .file_type in ("html", "unknown", "svg")
              or .file_type == "pdf"
            )
            and (
              regex.icontains(.file_name,
                              '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
                              // contains a time
                              // 01min , 60secs
                              '0?[1-9]\s*min(?:(?:ute)?s)?',
                              '\d{1,2}\s*s(?:ec(?:ond)?s)?',
                              // (00:50s)
                              // 3:26 seconds
                              '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                              // 03min25secs
                              '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                              // [0:39]
                              // (0:39)
                              '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
                              // contains an emoji
                              '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
              )
              // somtimes there is no name, it's just the extension which is also strange
              or .file_name in~ (".htm", ".html")
              // or sometimes it has no name....
              or .file_name is null
            )
        )
    )
  ),
  // attached eml sender/recipeient/subject are all the same as the outer
  // and have an attachment or body links
  (
    any(filter(attachments,
               .content_type == "message/rfc822" or .file_extension in ('eml')
        ),
        // which contain attachments
        // this is the same logic as above
        file.parse_eml(.).subject.subject == subject.subject
        and file.parse_eml(.).sender.email.email == sender.email.email
        and (
          length(file.parse_eml(.).recipients.to) == length(recipients.to)
          and all(recipients.to,
                  .email.email in map(file.parse_eml(..).recipients.to,
                                      .email.email
                  )
          )
        )
        and (
          // there are attachments
          length(file.parse_eml(.).attachments) > 0
          // or body links
          or length(filter(file.parse_eml(.).body.links,
                           .href_url.domain.domain not in $org_domains
                           and .href_url.domain.root_domain not in $org_domains
                    )
          ) > 0
        )
    )
  ),
  // the body links contain the recipients email
  (
    length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
    and any(body.links,
            any(recipients.to,
                strings.icontains(..href_url.url, .email.email)
                or strings.icontains(..href_url.url, .email.local_part)
            )
    )
  ),
  (
    length(body.current_thread.text) < 700
    and regex.icontains(body.current_thread.text,
                        'Méssãge|Méssage|Recéived|Addréss'
    )
  ),
  (
    // sender domain matches no body domains
    // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
    length(filter(body.links,
                  .display_text is not null
                  and .display_url.url is null
                  and .href_url.domain.valid
           )
    ) > 0
    and all(filter(body.links,
                   .display_text is not null
                   and .display_url.url is null
                   and .href_url.domain.valid
            ),
            .href_url.domain.root_domain != sender.email.domain.root_domain
            and .href_url.domain.root_domain not in $org_domains
            and .href_url.domain.root_domain not in ("aka.ms")
            and .href_url.domain.root_domain not in (
              "unitelvoice.com",
              "googleapis.com",
              "dialmycalls.com",
              "ringcentral.biz",
              "google.com"
            )
    )
  ),
  // the body links contain vm related phrases
  (
    any(body.links,
        regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
        and regex.icontains(.display_text,
                            '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
        )
        // negate FP terms in link display texts
        and not strings.icontains(.display_text, 'voice call center')
    )
  ),
  (
    any(body.links,
        .href_url.path == "/ctt"
        and regex.icontains(.display_text,
                            '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
        )
        // negate FP terms in link display texts
        and not strings.icontains(.display_text, 'voice call center')
    )
  ),
  // new domains
  (
    any(body.links,
        network.whois(.href_url.domain).days_old < 10
        and not strings.icontains(.href_url.path, "unsubscribe")
    )
  ),
  // sld use in sender/subject selements
  (
    any(recipients.to,
        // recipient's SLD is in the sender's display name
        strings.icontains(sender.display_name, .email.domain.sld)
        // recipient's SLD is in the sender's display name
        or strings.icontains(subject.subject, .email.domain.sld)
        // recipient's SLD is in the senders local_part
        or strings.icontains(sender.email.local_part, .email.domain.sld)
    )
  ),
  // often times the subject or sender display name will contain time references
  (
    any([sender.display_name, subject.subject, body.current_thread.text],
        regex.icontains(.,
                        // 01min , 60secs
                        '0?[1-9]\s*min(?:(?:ute)?s)?\b',
                        '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
                        // (00:50s)
                        // 3:26 seconds
                        '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                        // 03min25secs
                        '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                        // [0:39]
                        // (0:39)
                        '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
        )
    )
    // resuse the same logic against ORC output of message_screenshot
    or any(filter(file.explode(file.message_screenshot()), .depth == 0),
           regex.icontains(.scan.ocr.raw,
                           // 01min , 60secs
                           '0?[1-9]\s*min(?:(?:ute)?s)?\b',
                           '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
                           // (00:50s)
                           // 3:26 seconds
                           '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                           // 03min25secs
                           '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                           // [0:39]
                           // (0:39)
                           '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
           )
    )
  ),
  // often times the subject or sender display name will contain dates
  (
    any([sender.display_name, subject.subject],
        // days of week
        any([
              'monday',
              'tuesday',
              'wednesday',
              'thursday',
              'friday',
              'saturday',
              'sunday'
            ],
            strings.icontains(.., .)
        )
        // months
        // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
        or any([
                 "January",
                 "February",
                 "March",
                 "April",
                 "June",
                 "July",
                 "August",
                 "September",
                 "October",
                 "November",
                 "December"
               ],
               strings.icontains(.., .)
        )
        // use a regex for May
        or regex.icontains(., '\bmay\b')
        // common date formats
        or regex.contains(.,
                          // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
                          '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
                          // MM/DD/YYYY or MM/DD/YY (US format)
                          '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
                          // DD/MM/YYYY or DD/MM/YY (European format)
                          '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
                          // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
                          '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
        )
        // common time formats
        or regex.contains(.,
                          // Example: 23:45, 08:30
                          '([01]\d|2[0-3]):([0-5]\d)',
                          // Example: 23:45:59, 08:30:12
                          '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
                          // Example: 08:30 AM, 12:45 pm
                          '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
                          // Example: 08:30 AM, 12:45 pm
                          '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
        )
    )
    // or use the OCR results from file.message_screenshot
    or any(filter(file.explode(file.message_screenshot()), .depth == 0),
           // days of week
           any([
                 'monday',
                 'tuesday',
                 'wednesday',
                 'thursday',
                 'friday',
                 'saturday',
                 'sunday'
               ],
               strings.icontains(..scan.ocr.raw, .)
           )
           // months
           // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
           or any([
                    "January",
                    "February",
                    "March",
                    "April",
                    "June",
                    "July",
                    "August",
                    "September",
                    "October",
                    "November",
                    "December"
                  ],
                  strings.icontains(..scan.ocr.raw, .)
           )
           // use a regex for May
           or regex.contains(.scan.ocr.raw, '\bMay\b')
           // common date formats
           or regex.contains(.scan.ocr.raw,
                             // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
                             '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
                             // MM/DD/YYYY or MM/DD/YY (US format)
                             '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
                             // DD/MM/YYYY or DD/MM/YY (European format)
                             '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
                             // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
                             '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
           )
           // common time formats
           or regex.contains(.scan.ocr.raw,
                             // Example: 23:45, 08:30
                             '([01]\d|2[0-3]):([0-5]\d)',
                             // Example: 23:45:59, 08:30:12
                             '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
                             // Example: 08:30 AM, 12:45 pm
                             '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
                             // Example: 08:30 AM, 12:45 pm
                             '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
           )
    )
  ),
  // there are often emoji in the sender display name
  (
    any([sender.display_name, subject.subject],
        // contains an emoji
        regex.contains(.,
                       '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
        )
        // negate where the emoji occur in tags
        and not regex.contains(.,
                               '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
        )
    )
  ),
  // an attachment is a pdf, image, or document that contains a url
  (
    1 <= length(attachments) <= 2
    and any(attachments,
            (
              .file_type in $file_types_images
              or .file_type == "pdf"
              or .file_extension in $file_extensions_macros
            )
            and any(file.explode(.),
                    .scan.qr.type == "url"
                    or strings.icontains(.scan.qr.data, 'http')
                    or any(recipients.to,
                           strings.icontains(..scan.qr.data, .email.local_part)
                           or strings.icontains(..scan.qr.data, .email.email)
                    )
            )
    )
  )
)

// negating legit replies and legitimate audio file attachments and known voicemail senders
and not (
  sender.email.domain.valid
  and sender.email.domain.root_domain in (
    "magicjack.com",
    "magicjackforbusiness.com",
    "unitelvoice.com",
    "voipinterface.net",
    "ringcentral.biz",
    "verizonwireless.com",
    "t-mobile.com",
    "justcall.io",
    "airtel.com",
    "grasshopper.com",
    "ooma.com",
    "ui.com"
  )
)
and not (
  any(attachments,
      strings.starts_with(.content_type, "audio")
      // confirm the content type with .file_type
      // we have seen attachments claim to be audio/* files, only to be exploded as something else
      and .file_type in ("wav", "mp3")
  )
)
and not (
  (
    strings.istarts_with(subject.subject, "RE:")
    // out of office auto-reply
    // the NLU model will handle these better natively soon
    or strings.istarts_with(subject.subject, "Automatic reply:")
  )
  and (
    length(headers.references) > 0
    or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
  )
)
// bounce-back negations
and not any(attachments,
            any(file.parse_eml(.).attachments,
                .content_type == "message/delivery-status"
            )
)
// bounce-back negations
and not (
  any(attachments,
      .content_type in ("message/delivery-status", "text/calendar")
  )
)
// negate newsletters
and not (
  (
    any(ml.nlu_classifier(body.current_thread.text).topics,
        .name in (
          "Newsletters and Digests",
          "B2B Cold Outreach",
          "Events and Webinars"
        )
        and .confidence == "high"
    )
  )
  and not strings.icontains(body.current_thread.text,
                            "this voicemail was shared by"
  )
)
// negate bouncebacks from proofpoint
and not (
  sender.display_name == "Mail Delivery Subsystem"
  and strings.ends_with(headers.message_id, "pphosted.com>")
  and any(headers.hops,
          .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
  )
  and any(attachments, .content_type == "message/rfc822")
)
// negate CheckPoint encrypted messages
and not (
  // CheckPoint banner
  length(attachments) == 1
  and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
  and strings.istarts_with(headers.message_id, "<encrypted")
  and any(headers.domains, .root_domain == "checkpointcloudsec.com")
)
// an impersonated high trust domain
and (
  (
    sender.email.domain.root_domain in $high_trust_sender_root_domains
    and not headers.auth_summary.dmarc.pass
    // service abuse
    and not sender.email.email in ("noreply-application-integration@google.com")
  )
  or sender.email.domain.root_domain not in $high_trust_sender_root_domains

  // sender profile
  or (
    (
      not sender.email.domain.root_domain in $org_domains
      and (profile.by_sender_email().prevalence not in ("common"))
      and not profile.by_sender_email().solicited
    )
    or (
      profile.by_sender_email().any_messages_malicious_or_spam
      and not profile.by_sender_email().any_messages_benign
    )
    // match if the sender is in org domains but failed auth
    or (
      sender.email.domain.domain in $org_domains
      and not coalesce(headers.auth_summary.dmarc.pass, false)
    )
    // match if the sender address is blank or null
    or (regex.match(sender.email.email, "") or sender.email.email is null)
  )
)

Data Sources

Email MessagesEmail HeadersEmail Attachments

Platforms

Raw Content

name: "Fake voicemail notification (untrusted sender)"
description: |
  This rule detects a common credential phishing vector enticing the user to engage with links under the premise that they have a voicemail to retrieve.
  The rule looks for voicemail verbiage in the display name, body, subject or a combination of those elements with emojis or a medium to high credential theft NLU Intent from first-time + unsolicited sender.
type: "rule"
severity: "medium"
source: |
  type.inbound
  // contains links or attachments
  and (
    (0 < length(body.links) <= 25 or 0 < length(distinct(attachments, .md5)) <= 3)
    and 0 <= length(distinct(attachments, .md5)) <= 8
  )
  
  // the subject or display_name need some keywords which are voicemail related
  and (
    any([subject.subject, sender.display_name],
        regex.icontains(.,
                        // split phrases that occur within 3 words between or only punctuation between them
                        '(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|incoming|missed(?:\sa\s)?|left( a)?|wireless)(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?)\b',
                        // regex specific to v-mail, v_msg, v,mail, etc
                        // list of "secondary" words synced with regex above this one
                        'v[[:punct:]](?:mail|message|msg|recording|received|notif|support|ca[li1][li1]\d*\b|ca[il1][il1](?:er)?|log|transcript(?:ion)?\b)',
                        // split phrases that start with "caller" that occur within 3 words between or only punctation
                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[nm](\b|[[:punct:]])?|\bv[o0][il1]ce(?:mail|message)?|audi[o0]|missed(?:\sa\s)?|left( a)?)',
                        // strong phrases
                        '(?:open mp3|audi[o0] note|\.wav|left a vm|[^\s]+voip[^\s]*|unanswered.*ca[li1][li1]|incoming.vm|left msg|wireless ca[li1][li1]er|VM Service|v[o0][il1]ce message|missed.ca[li1][li1](?:e[rd])?|\bca[li1][li1].(?:support|service)(?: for| log)?|missed.{0,10} VM|new v[o0][il1]cemail from|new.v.m.from.\+?\d+|new v[o0][il1]cemail?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}transcript(s|ion)?|message received|new (?:message|call|voicemail).{0,15}(?:info|notification|alert)|incoming transmission|voice note)',
                        // starts in the format of `(4)` and contains some voicemail keywords
                        '^\(\d\)\s(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:message|voip|v[o0][il1]ce|unread|call)',
                        'ca[li1][li1](?:er)?(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:playback|transcript)',
  
                        // obfuscated phone number with at least one digit in the area code and at least one obfuscated number in the last group
                        // 555-555-555X, 555-555-XXXX, 555-5XX-XXXX
                        '\b1?\(?(\d{3}|\d{2}[\*X]|\d[\*X]{2})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})[^a-z0-9]{0,4}(\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})[^0-9]',
                        // obfuscated phone number with at least one digit in the prefix
                        // XXX-555-5555, XXX-5XX-XXXX
                        '\b1?\(?(\d{2}[\*X]|\d[\*X]{2}|[\*X]{2,3})\)?[^a-z0-9]{0,2}(\d{2,3}|\d{2}[\*X]|\d[\*X]{2})[^a-z0-9]{0,4}(\d{4}|\d{3}[\*X]|\d{2}[\*X]{2}|\d[\*X]{3}|[\*X]{3,4})\b',
                        // obfuscated voicemail/voicemessage keywords
                        'v[o0][il1]ce[\s\-_]?m(?:ail|sg|essage)?[\*X\.\-_]{2,}',
                        'v[o0][il1]cem[\*X\.\-_]{2,}',
                        // "X new voice..." patterns
                        '\d+\s+new.*v[o0][il1]ce(?:mail|message|m[\*]+)?',
                        // sent-message patterns
                        '(?:sent|new|incoming)[\s\-]+message.*(v[o0][il1]ce|<.*@.*>)',
        )
    )
    // body.current_thread.text inspection should be very specific to avoid FP
    or regex.icontains(strings.replace_confusables(body.current_thread.text),
                       // body.current_thread.text,
                       'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
                       '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
                       'v[o0][il1]cema[il1][li1] (is )?attached',
                       'an? (?:new )?encrypted v[o0][il1]cemail',
                       'a (?:new )?pending message',
                       'Your? have (?: an?)?incoming v[o0][il1]ceRec',
                       "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
                       'New V[o0][il1]cema[il1][li1] Received',
                       'New m[il1]ssed ca[li1][li1] record',
                       '\bv[o0][il1]cema[il1][li1] transcript\b',
                       'Listen to V[o0][il1]ceMa[il1][li1]',
                       'New v[o0][il1]cema[il1][li1] from',
                       'v[o0][il1]ce note'
    )
    // pull out two regexes that could benefit from negations
    or (
      regex.icontains(body.current_thread.text,
                      // body.current_thread.text,
                      '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
                      'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0])(?: message|notification)?',
      )
      and not regex.icontains(body.current_thread.text,
                              '(?:I(?:\sjust)?|just(?: called you at (?:\d+[[:punct:]])+) and)? left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audio)(?: message)?'
      )
      and not regex.icontains(body.current_thread.text,
                              'you (?:have |received )my voice\s?(?:mail|audio|message)'
      )
    )
    // Reuse the body.current_thread.text logic against the OCR output of the message screenshot
    or (
      length(attachments) > 0
      and (
        all(attachments,
            .file_type in $file_types_images
            and beta.parse_exif(.).image_height != 1
        )
        // there is a mix of fake audio attachments and images
        or (
          length(filter(attachments,
                        strings.starts_with(.content_type, "audio")
                        // confirm the content type with .file_type
                        // we have seen attachments claim to be audio/* files, only to be exploded as something else
                        and not .file_type in ("wav", "mp3")
                 )
          ) 
          // the total # of fake audio attachments + the total # of image attachments = the total # of attachments
          // meaning, all attachments that are NOT fake audio attachments MUST be images
          + length(filter(attachments,
                          .file_type in $file_types_images
                          and beta.parse_exif(.).image_height != 1
                   )
          ) == length(attachments)
        )
      )
      and any((filter(file.explode(file.message_screenshot()), .depth == 0)),
              regex.icontains(.scan.ocr.raw,
                              // body.current_thread.text,
                              'sent (?:from|by) (?:your )?v[o0][il1]ce (?:ma[il1][li1] )?system',
                              '(?:(?:new|this) |^)(?:v[o0][il1]ce(?:ma[il1][li1])?|aud[il1][o0]) (?:message|notification|record)',
                              'v[o0][il1]cema[il1][li1] (is )?attached',
                              'an? (?:new )?encrypted v[o0][il1]cemail',
                              'a (?:new )?pending message',
                              'Your? have (?: an?)?incoming v[o0][il1]ceRec',
                              "you(?:\'ve| have) a (?:new )?m[il1]ssed ca[li1][li1]",
                              'New V[o0][il1]cema[il1][li1] Received',
                              'New m[il1]ssed ca[li1][li1] record',
                              '\bv[o0][il1]cema[il1][li1] transcript\b',
                              'Listen to V[o0][il1]ceMa[il1][li1]',
                              'New v[o0][il1]cema[il1][li1] from',
                              'v[o0][il1]ce note'
              )
              or (
                regex.icontains(.scan.ocr.raw,
                                // body.current_thread.text,
                                '(?:you|we) (?:have |received )+(?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:\b|\s+)v[o0][il1]ce\s?(?:ma[il1][li1]|aud[il1][o0]|message|notification)',
                                'left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:v[o0][il1]ce(?:mail)?|audi[o0])(?: message)?',
                )
                and not regex.icontains(body.current_thread.text,
                                        '(?:I(?:\sjust)?|just) left you a (?:\w+(\s\w+)?|[[:punct:]]+|\s+){0,3}(?:voice(?:mail)?|audio)(?: message)?'
                )
                and not regex.icontains(body.current_thread.text,
                                        'you (?:have |received )my voice\s?(?:mail|audio|message)'
                )
              )
      )
    )
    or strings.icontains(body.html.raw, '<title>Voicemail Notification</title>')
    or strings.icontains(body.html.raw, '<!-- Voicemail phone logo')
  )
  and 2 of (
    (
      // the sender is a freemail
      sender.email.domain.root_domain in $free_email_providers
    ),
    (
      any(ml.nlu_classifier(body.current_thread.text).intents,
          .name in ("cred_theft") and .confidence in ("medium", "high")
      )
      or 
      // use the OCR from the message screenshot
      any(filter(file.explode(file.message_screenshot()), .depth == 0),
          any(ml.nlu_classifier(.scan.ocr.raw).intents,
              .name in ("cred_theft") and .confidence in ("medium", "high")
          )
      )
    ),
    (
      any(attachments,
          .content_type in ("html", "text", "text/html")
          and any(ml.logo_detect(file.html_screenshot(.)).brands,
                  .name in ("Microsoft") and .confidence in ("medium", "high")
          )
      )
    ),
    (
      regex.icontains(sender.display_name,
                      '(v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|(transcription|Caller.?ID)'
      )
    ),
    // attachment names are often HTML and voice mail related
    (
      any(attachments,
          // this logic is reused below for eml attachments
          // ensure updates occur both places
          (
            .content_type in ("html", "text", "text/html")
            or .file_type in ("html", "unknown", "svg")
            or .file_type == "pdf"
          )
          and (
            regex.icontains(.file_name,
                            '(?:v[o0][il1]ce|aud[i1l][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
                            // contains a time
                            // 01min , 60secs
                            '0?[1-9]\s*min(?:(?:ute)?s)?',
                            '\d{1,2}\s*s(?:ec(?:ond)?s)?',
                            // (00:50s)
                            // 3:26 seconds
                            '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                            // 03min25secs
                            '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                            // [0:39]
                            // (0:39)
                            '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
                            // contains an emoji
                            '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
            )
            // somtimes there is no name, it's just the extension which is also strange
            or .file_name in~ (".htm", ".html")
            // or sometimes it has no name....
            or .file_name is null
          )
      )
    ),
    // attachment contains javascript
    (
      any(attachments,
          (
            .content_type in ("html", "text", "text/html")
            or .file_type in ("html", "unknown", "svg")
          )
          and (
            (
              .size < 1500
              and any(file.explode(.), length(.scan.html.scripts) > 0)
            )
            // bypass the size requirement under these conditions
            or (
              // sync with https://github.com/sublime-security/sublime-rules/blob/main/detection-rules/attachment_svg_embedded_js.yml
              strings.ilike(file.parse_text(.,
                                            encodings=[
                                              "ascii",
                                              "utf8",
                                              "utf16-le"
                                            ]
                            ).text,
                            "*onload*",
                            "*window.location.href*",
                            "*onerror*",
                            "*CDATA*",
                            "*<script*",
                            "*</script*",
                            "*atob*",
                            "*location.assign*",
                            "*decodeURIComponent*"
              )
            )
          )
      )
    ),
    (
      any(attachments,
          (
            .content_type in ("html", "text", "text/html")
            or .file_type in ("html", "unknown", "svg")
          )
          and any(recipients.to,
                  // the html attachment contains a receipient email address
                  strings.contains(file.parse_html(..).raw, .email.email)
                  // the sld of the domain is in the attachment name
                  or strings.contains(..file_name, .email.domain.sld)
          )
      )
    ),
    // eml attachments
    (
      any(filter(attachments,
                 .content_type == "message/rfc822" or .file_extension in ('eml')
          ),
          // which contain attachments
          // this is the same logic as above
          any(file.parse_eml(.).attachments,
              (
                .content_type in ("html", "text", "text/html")
                or .file_type in ("html", "unknown", "svg")
                or .file_type == "pdf"
              )
              and (
                regex.icontains(.file_name,
                                '(?:v[o0][il1]ce|aud[il1][o0]|call|missed|caii|mail|message|recording|call|caii|transcr[il1]ption|v[nm]|audi[o0]|play|listen|unheard|msg)',
                                // contains a time
                                // 01min , 60secs
                                '0?[1-9]\s*min(?:(?:ute)?s)?',
                                '\d{1,2}\s*s(?:ec(?:ond)?s)?',
                                // (00:50s)
                                // 3:26 seconds
                                '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                                // 03min25secs
                                '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                                // [0:39]
                                // (0:39)
                                '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s',
                                // contains an emoji
                                '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
                )
                // somtimes there is no name, it's just the extension which is also strange
                or .file_name in~ (".htm", ".html")
                // or sometimes it has no name....
                or .file_name is null
              )
          )
      )
    ),
    // attached eml sender/recipeient/subject are all the same as the outer
    // and have an attachment or body links
    (
      any(filter(attachments,
                 .content_type == "message/rfc822" or .file_extension in ('eml')
          ),
          // which contain attachments
          // this is the same logic as above
          file.parse_eml(.).subject.subject == subject.subject
          and file.parse_eml(.).sender.email.email == sender.email.email
          and (
            length(file.parse_eml(.).recipients.to) == length(recipients.to)
            and all(recipients.to,
                    .email.email in map(file.parse_eml(..).recipients.to,
                                        .email.email
                    )
            )
          )
          and (
            // there are attachments
            length(file.parse_eml(.).attachments) > 0
            // or body links
            or length(filter(file.parse_eml(.).body.links,
                             .href_url.domain.domain not in $org_domains
                             and .href_url.domain.root_domain not in $org_domains
                      )
            ) > 0
          )
      )
    ),
    // the body links contain the recipients email
    (
      length(filter(recipients.to, .email.email != "" or .email.domain.valid)) > 0
      and any(body.links,
              any(recipients.to,
                  strings.icontains(..href_url.url, .email.email)
                  or strings.icontains(..href_url.url, .email.local_part)
              )
      )
    ),
    (
      length(body.current_thread.text) < 700
      and regex.icontains(body.current_thread.text,
                          'Méssãge|Méssage|Recéived|Addréss'
      )
    ),
    (
      // sender domain matches no body domains
      // only inspect "links" that have a display_text and display_url is null to remove "plain text" email address from being caught
      length(filter(body.links,
                    .display_text is not null
                    and .display_url.url is null
                    and .href_url.domain.valid
             )
      ) > 0
      and all(filter(body.links,
                     .display_text is not null
                     and .display_url.url is null
                     and .href_url.domain.valid
              ),
              .href_url.domain.root_domain != sender.email.domain.root_domain
              and .href_url.domain.root_domain not in $org_domains
              and .href_url.domain.root_domain not in ("aka.ms")
              and .href_url.domain.root_domain not in (
                "unitelvoice.com",
                "googleapis.com",
                "dialmycalls.com",
                "ringcentral.biz",
                "google.com"
              )
      )
    ),
    // the body links contain vm related phrases
    (
      any(body.links,
          regex.contains(.display_text, '[^a-z]*[A-Z][^a-z]*')
          and regex.icontains(.display_text,
                              '(?:v[nm]|v[o0][il1]ce|audi[o0]|ca[li][li]|missed|preview)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
          )
          // negate FP terms in link display texts
          and not strings.icontains(.display_text, 'voice call center')
      )
    ),
    (
      any(body.links,
          .href_url.path == "/ctt"
          and regex.icontains(.display_text,
                              '(v[nm]|v[o0][il1]ce|audi[o0]|call|missed|caii)(\s?|-)(mail|message|recording|call|caii)|transcription|open mp3|audi[o0] note|listen|playback|\(?(?:\*\*\*|[0-9]{3})?.(?:\*\*\*|[0-9]{3})[^a-z]{0,2}(?:\*{4}|\d+\*+)|play|voice note'
          )
          // negate FP terms in link display texts
          and not strings.icontains(.display_text, 'voice call center')
      )
    ),
    // new domains
    (
      any(body.links,
          network.whois(.href_url.domain).days_old < 10
          and not strings.icontains(.href_url.path, "unsubscribe")
      )
    ),
    // sld use in sender/subject selements
    (
      any(recipients.to,
          // recipient's SLD is in the sender's display name
          strings.icontains(sender.display_name, .email.domain.sld)
          // recipient's SLD is in the sender's display name
          or strings.icontains(subject.subject, .email.domain.sld)
          // recipient's SLD is in the senders local_part
          or strings.icontains(sender.email.local_part, .email.domain.sld)
      )
    ),
    // often times the subject or sender display name will contain time references
    (
      any([sender.display_name, subject.subject, body.current_thread.text],
          regex.icontains(.,
                          // 01min , 60secs
                          '0?[1-9]\s*min(?:(?:ute)?s)?\b',
                          '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
                          // (00:50s)
                          // 3:26 seconds
                          '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                          // 03min25secs
                          '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                          // [0:39]
                          // (0:39)
                          '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
          )
      )
      // resuse the same logic against ORC output of message_screenshot
      or any(filter(file.explode(file.message_screenshot()), .depth == 0),
             regex.icontains(.scan.ocr.raw,
                             // 01min , 60secs
                             '0?[1-9]\s*min(?:(?:ute)?s)?\b',
                             '\d{1,2}\s*s(?:ec(?:ond)?s)?\b',
                             // (00:50s)
                             // 3:26 seconds
                             '[\(\[]?(?:\d{1,2}[\:\s-])\d{1,2}[\)\]]?\s*(?:s(?:(?:ecs?)onds)?)[\)\]]?',
                             // 03min25secs
                             '0?[1-9]\s*min(?:(?:ute)?s)?\d{1,2}\s*s(?:ec(?:ond)?s)?',
                             // [0:39]
                             // (0:39)
                             '[\(\[](?:\d{1,2}[\:\s-])\d{1,2}[\)\]]\s'
             )
      )
    ),
    // often times the subject or sender display name will contain dates
    (
      any([sender.display_name, subject.subject],
          // days of week
          any([
                'monday',
                'tuesday',
                'wednesday',
                'thursday',
                'friday',
                'saturday',
                'sunday'
              ],
              strings.icontains(.., .)
          )
          // months
          // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
          or any([
                   "January",
                   "February",
                   "March",
                   "April",
                   "June",
                   "July",
                   "August",
                   "September",
                   "October",
                   "November",
                   "December"
                 ],
                 strings.icontains(.., .)
          )
          // use a regex for May
          or regex.icontains(., '\bmay\b')
          // common date formats
          or regex.contains(.,
                            // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
                            '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
                            // MM/DD/YYYY or MM/DD/YY (US format)
                            '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
                            // DD/MM/YYYY or DD/MM/YY (European format)
                            '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
                            // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
                            '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
          )
          // common time formats
          or regex.contains(.,
                            // Example: 23:45, 08:30
                            '([01]\d|2[0-3]):([0-5]\d)',
                            // Example: 23:45:59, 08:30:12
                            '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
                            // Example: 08:30 AM, 12:45 pm
                            '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
                            // Example: 08:30 AM, 12:45 pm
                            '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
          )
      )
      // or use the OCR results from file.message_screenshot
      or any(filter(file.explode(file.message_screenshot()), .depth == 0),
             // days of week
             any([
                   'monday',
                   'tuesday',
                   'wednesday',
                   'thursday',
                   'friday',
                   'saturday',
                   'sunday'
                 ],
                 strings.icontains(..scan.ocr.raw, .)
             )
             // months
             // may is problematic for words like "Mayor", "Maybe", "MayFlower", etc
             or any([
                      "January",
                      "February",
                      "March",
                      "April",
                      "June",
                      "July",
                      "August",
                      "September",
                      "October",
                      "November",
                      "December"
                    ],
                    strings.icontains(..scan.ocr.raw, .)
             )
             // use a regex for May
             or regex.contains(.scan.ocr.raw, '\bMay\b')
             // common date formats
             or regex.contains(.scan.ocr.raw,
                               // YYYY-MM-DD or YY-MM-DD (ISO 8601 format)
                               '\d{2}(\d{2})?-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])',
                               // MM/DD/YYYY or MM/DD/YY (US format)
                               '(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/\d{2}(\d{2})?',
                               // DD/MM/YYYY or DD/MM/YY (European format)
                               '(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/\d{2}(\d{2})?',
                               // Month DD, YYYY or Month DD, YY (e.g., March 15, 2024 or March 15, 24)
                               '(January|February|March|April|May|June|July|August|September|October|November|December) (0[1-9]|[12]\d|3[01]), \d{2}(\d{2})?'
             )
             // common time formats
             or regex.contains(.scan.ocr.raw,
                               // Example: 23:45, 08:30
                               '([01]\d|2[0-3]):([0-5]\d)',
                               // Example: 23:45:59, 08:30:12
                               '([01]\d|2[0-3]):([0-5]\d):([0-5]\d)',
                               // Example: 08:30 AM, 12:45 pm
                               '(0[1-9]|1[0-2]):([0-5]\d)\s?([AaPp][Mm])',
                               // Example: 08:30 AM, 12:45 pm
                               '(0[1-9]|1[0-2]):([0-5]\d):([0-5]\d) ?([AaPp][Mm])'
             )
      )
    ),
    // there are often emoji in the sender display name
    (
      any([sender.display_name, subject.subject],
          // contains an emoji
          regex.contains(.,
                         '[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}]'
          )
          // negate where the emoji occur in tags
          and not regex.contains(.,
                                 '^(?:\[[^\]]*\]\s*)*\[[^\]]*[\x{1F300}-\x{1F5FF}\x{1F600}-\x{1F64F}\x{1F680}-\x{1F6FF}\x{1F700}-\x{1F77F}\x{1F780}-\x{1F7FF}\x{1F900}-\x{1F9FF}\x{2600}-\x{26FF}\x{2700}-\x{27BF}\x{2300}-\x{23FF}][^\]]*\]'
          )
      )
    ),
    // an attachment is a pdf, image, or document that contains a url
    (
      1 <= length(attachments) <= 2
      and any(attachments,
              (
                .file_type in $file_types_images
                or .file_type == "pdf"
                or .file_extension in $file_extensions_macros
              )
              and any(file.explode(.),
                      .scan.qr.type == "url"
                      or strings.icontains(.scan.qr.data, 'http')
                      or any(recipients.to,
                             strings.icontains(..scan.qr.data, .email.local_part)
                             or strings.icontains(..scan.qr.data, .email.email)
                      )
              )
      )
    )
  )
  
  // negating legit replies and legitimate audio file attachments and known voicemail senders
  and not (
    sender.email.domain.valid
    and sender.email.domain.root_domain in (
      "magicjack.com",
      "magicjackforbusiness.com",
      "unitelvoice.com",
      "voipinterface.net",
      "ringcentral.biz",
      "verizonwireless.com",
      "t-mobile.com",
      "justcall.io",
      "airtel.com",
      "grasshopper.com",
      "ooma.com",
      "ui.com"
    )
  )
  and not (
    any(attachments,
        strings.starts_with(.content_type, "audio")
        // confirm the content type with .file_type
        // we have seen attachments claim to be audio/* files, only to be exploded as something else
        and .file_type in ("wav", "mp3")
    )
  )
  and not (
    (
      strings.istarts_with(subject.subject, "RE:")
      // out of office auto-reply
      // the NLU model will handle these better natively soon
      or strings.istarts_with(subject.subject, "Automatic reply:")
    )
    and (
      length(headers.references) > 0
      or any(headers.hops, any(.fields, strings.ilike(.name, "In-Reply-To")))
    )
  )
  // bounce-back negations
  and not any(attachments,
              any(file.parse_eml(.).attachments,
                  .content_type == "message/delivery-status"
              )
  )
  // bounce-back negations
  and not (
    any(attachments,
        .content_type in ("message/delivery-status", "text/calendar")
    )
  )
  // negate newsletters
  and not (
    (
      any(ml.nlu_classifier(body.current_thread.text).topics,
          .name in (
            "Newsletters and Digests",
            "B2B Cold Outreach",
            "Events and Webinars"
          )
          and .confidence == "high"
      )
    )
    and not strings.icontains(body.current_thread.text,
                              "this voicemail was shared by"
    )
  )
  // negate bouncebacks from proofpoint
  and not (
    sender.display_name == "Mail Delivery Subsystem"
    and strings.ends_with(headers.message_id, "pphosted.com>")
    and any(headers.hops,
            .index == 0 and strings.contains(.received.server.raw, "pphosted.com")
    )
    and any(attachments, .content_type == "message/rfc822")
  )
  // negate CheckPoint encrypted messages
  and not (
    // CheckPoint banner
    length(attachments) == 1
    and any(body.links, .href_url.domain.root_domain == "checkpointcloudsec.com")
    and strings.istarts_with(headers.message_id, "<encrypted")
    and any(headers.domains, .root_domain == "checkpointcloudsec.com")
  )
  // an impersonated high trust domain
  and (
    (
      sender.email.domain.root_domain in $high_trust_sender_root_domains
      and not headers.auth_summary.dmarc.pass
      // service abuse
      and not sender.email.email in ("noreply-application-integration@google.com")
    )
    or sender.email.domain.root_domain not in $high_trust_sender_root_domains
  
    // sender profile
    or (
      (
        not sender.email.domain.root_domain in $org_domains
        and (profile.by_sender_email().prevalence not in ("common"))
        and not profile.by_sender_email().solicited
      )
      or (
        profile.by_sender_email().any_messages_malicious_or_spam
        and not profile.by_sender_email().any_messages_benign
      )
      // match if the sender is in org domains but failed auth
      or (
        sender.email.domain.domain in $org_domains
        and not coalesce(headers.auth_summary.dmarc.pass, false)
      )
      // match if the sender address is blank or null
      or (regex.match(sender.email.email, "") or sender.email.email is null)
    )
  )
attack_types:
  - "Credential Phishing"
tactics_and_techniques:
  - "Social engineering"
detection_methods:
  - "Content analysis"
  - "Natural Language Understanding"
  - "Sender analysis"
  - "URL analysis"
id: "74ba7787-e543-5ce8-b6eb-e1ecdb8f1d67"