EXPLORE
← Back to Explore
sublimehighRule

Attachment: Web files with suspicious comments

Detects HTML or SVG files under 100KB that contain duplicate or padding text in the form of literary quotes or common sayings within code comments.

Detection Query

type.inbound
and any(attachments,
        (
          (
            .file_type in ("html", "svg")
            or .file_extension in ("html", "xhtml", "mhtml", "svg")
            or .content_type in ("text/html", "text/plain")
          )
          and .size < 100000
        )
        and (
          (
            // targeting comments that pad the file with quotes from literature
            // examples: "// Echoes of the past linger in silence.", "// The wind whispered secrets unknown.", "// Shadows tell stories in the dark."

            // count all HTML code comments that match our pattern
            regex.count(file.parse_text(.).text, '// [A-Z][ a-z ]+\.') / 
            // divide by the count of all UNIQUE HTML code comments that match our pattern
            length(distinct(regex.extract(file.parse_text(.).text,
                                          '// [A-Z][ a-z ]+\.'
                            ),
                            .full_match
                   )
            ) 
            // at least 50% of the comments are duplicates
            >= 2
          )
          or (
            // targeting comments that pad the file with sayings
            // examples: "<!-- <span> No gain without pain. </span> -->", "<!-- <p> Beauty is only skin deep. </p> -->", "<!-- <span> Actions speak louder than words. </span> -->"
            regex.count(file.parse_text(.).text,
                        '<!-- +(<[a-z]+>)? [A-Z][ a-z ]+\. (</[a-z]+>)? +-->'
            )
          ) > 2
          or (
            // targeting comments inside hidden HTML elements
            // example: "<h1 style="display:none;"> Self-confidence inspires others to believe in you. </h1>"
            regex.count(file.parse_text(.).text,
                        '<[a-z0-9]+ style="display:none;">(<[a-z]+>)? [A-Z].*\. </[a-z0-9]+>'
            )
          ) > 2
        )
)

Data Sources

Email MessagesEmail HeadersEmail Attachments

Platforms

email

Tags

Attack surface reduction
Raw Content
name: "Attachment: Web files with suspicious comments"
description: "Detects HTML or SVG files under 100KB that contain duplicate or padding text in the form of literary quotes or common sayings within code comments."
type: "rule"
severity: "high"
source: |
  type.inbound
  and any(attachments,
          (
            (
              .file_type in ("html", "svg")
              or .file_extension in ("html", "xhtml", "mhtml", "svg")
              or .content_type in ("text/html", "text/plain")
            )
            and .size < 100000
          )
          and (
            (
              // targeting comments that pad the file with quotes from literature
              // examples: "// Echoes of the past linger in silence.", "// The wind whispered secrets unknown.", "// Shadows tell stories in the dark."
  
              // count all HTML code comments that match our pattern
              regex.count(file.parse_text(.).text, '// [A-Z][ a-z ]+\.') / 
              // divide by the count of all UNIQUE HTML code comments that match our pattern
              length(distinct(regex.extract(file.parse_text(.).text,
                                            '// [A-Z][ a-z ]+\.'
                              ),
                              .full_match
                     )
              ) 
              // at least 50% of the comments are duplicates
              >= 2
            )
            or (
              // targeting comments that pad the file with sayings
              // examples: "<!-- <span> No gain without pain. </span> -->", "<!-- <p> Beauty is only skin deep. </p> -->", "<!-- <span> Actions speak louder than words. </span> -->"
              regex.count(file.parse_text(.).text,
                          '<!-- +(<[a-z]+>)? [A-Z][ a-z ]+\. (</[a-z]+>)? +-->'
              )
            ) > 2
            or (
              // targeting comments inside hidden HTML elements
              // example: "<h1 style="display:none;"> Self-confidence inspires others to believe in you. </h1>"
              regex.count(file.parse_text(.).text,
                          '<[a-z0-9]+ style="display:none;">(<[a-z]+>)? [A-Z].*\. </[a-z0-9]+>'
              )
            ) > 2
          )
  )
tags:
 - "Attack surface reduction"
attack_types:
  - "Credential Phishing"
  - "Malware/Ransomware"
tactics_and_techniques:
  - "HTML smuggling"
  - "Evasion"
detection_methods:
  - "File analysis"
  - "HTML analysis"
  - "Content analysis"
id: "93061d17-730a-5b33-955d-8f8f6cc5cca9"