EXPLORE
← Back to Explore
sublimemediumRule

Attachment: Office file with credential phishing URLs

Detects Office documents containing embedded URLs that redirect to credential phishing pages. The rule filters out standard XML namespace and schema URLs commonly found in legitimate Office documents, then analyzes remaining URLs for malicious content using machine learning link analysis.

MITRE ATT&CK

defense-evasioninitial-access

Detection Query

type.inbound
// Filter to Office documents that contain 1-3 non-schema URLs
and any(filter(attachments,
               // Only check Office documents that can contain macros/embedded content
               .file_extension in $file_extensions_macros

               // Count URLs after filtering out common XML namespace/schema URLs
               and 0 < sum(map(map(file.explode(.),
                                   // Filter out standard XML namespace URLs that appear in all Office docs
                                   filter(.scan.url.urls,
                                          // Exclude OpenXML format schemas
                                          .domain.domain not in (
                                            'schemas.openxmlformats.org',
                                            'schemas.microsoft.com',
                                            'www.w3.org'
                                          )
                                          // Additional Microsoft domain exclusion
                                          and not .domain.domain in (
                                            'microsoft.com',
                                            'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
                                          )
                                          // Exclude Dublin Core persistent URLs (metadata schemas)
                                          and not (
                                            .domain.domain == 'purl.org'
                                            and strings.starts_with(.path,
                                                                    '/dc/'
                                            )
                                          )
                                          // Exclude Dublin Core XML schemas
                                          and not (
                                            .domain.domain == "dublincore.org"
                                            and strings.starts_with(.path,
                                                                    '/schemas/xmls/'
                                            )
                                          )
                                   )
                               ),
                               // Count URLs in each exploded file component
                               length(.)
                           )
               ) <= 3 // Only process attachments with 3 or fewer non-schema URLs
        ),
        // For the filtered Office documents, check for malicious URLs
        any(file.explode(.),
            any(
                // Apply the same URL filtering to remove XML namespace noise
                filter(.scan.url.urls,
                       .domain.domain not in (
                         'schemas.openxmlformats.org',
                         'schemas.microsoft.com',
                         'www.w3.org'
                       )
                       and not .domain.domain in (
                         'microsoft.com',
                         'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
                       )
                       and not (
                         .domain.domain == 'purl.org'
                         and strings.starts_with(.path, '/dc/')
                       )
                       and not (
                         .domain.domain == "dublincore.org"
                         and strings.starts_with(.path, '/schemas/xmls/')
                       )
                ),
                // Run link analysis on the filtered URLs to detect phishing
                ml.link_analysis(.).credphish.disposition == "phishing"
                // confidence is only returned when brands, if it's not there, consider this true
                // this ensures if there is a brand, the confidence is high
                // and allows matching when there is no confidence
                and coalesce(ml.link_analysis(.).credphish.confidence == "high",
                             true
                )
                and not (
                  ml.link_analysis(.).credphish.brand.name is not null
                  and ml.link_analysis(.).credphish.brand.name == "GoDaddy"
                  and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
                                        'is parked free, courtesy of GoDaddy.com.'
                  )
                  and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
                                        'Get This Domain'
                  )
                )
            )
        )
)

Data Sources

Email MessagesEmail HeadersEmail Attachments

Platforms

email
Raw Content
name: "Attachment: Office file with credential phishing URLs"
description: "Detects Office documents containing embedded URLs that redirect to credential phishing pages. The rule filters out standard XML namespace and schema URLs commonly found in legitimate Office documents, then analyzes remaining URLs for malicious content using machine learning link analysis."
type: "rule"
severity: "medium"
source: |
  type.inbound
  // Filter to Office documents that contain 1-3 non-schema URLs
  and any(filter(attachments,
                 // Only check Office documents that can contain macros/embedded content
                 .file_extension in $file_extensions_macros
  
                 // Count URLs after filtering out common XML namespace/schema URLs
                 and 0 < sum(map(map(file.explode(.),
                                     // Filter out standard XML namespace URLs that appear in all Office docs
                                     filter(.scan.url.urls,
                                            // Exclude OpenXML format schemas
                                            .domain.domain not in (
                                              'schemas.openxmlformats.org',
                                              'schemas.microsoft.com',
                                              'www.w3.org'
                                            )
                                            // Additional Microsoft domain exclusion
                                            and not .domain.domain in (
                                              'microsoft.com',
                                              'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
                                            )
                                            // Exclude Dublin Core persistent URLs (metadata schemas)
                                            and not (
                                              .domain.domain == 'purl.org'
                                              and strings.starts_with(.path,
                                                                      '/dc/'
                                              )
                                            )
                                            // Exclude Dublin Core XML schemas
                                            and not (
                                              .domain.domain == "dublincore.org"
                                              and strings.starts_with(.path,
                                                                      '/schemas/xmls/'
                                              )
                                            )
                                     )
                                 ),
                                 // Count URLs in each exploded file component
                                 length(.)
                             )
                 ) <= 3 // Only process attachments with 3 or fewer non-schema URLs
          ),
          // For the filtered Office documents, check for malicious URLs
          any(file.explode(.),
              any(
                  // Apply the same URL filtering to remove XML namespace noise
                  filter(.scan.url.urls,
                         .domain.domain not in (
                           'schemas.openxmlformats.org',
                           'schemas.microsoft.com',
                           'www.w3.org'
                         )
                         and not .domain.domain in (
                           'microsoft.com',
                           'wps.cn' // WPS is a china based alt to MS Office and used in namespaces of the documents created by that product
                         )
                         and not (
                           .domain.domain == 'purl.org'
                           and strings.starts_with(.path, '/dc/')
                         )
                         and not (
                           .domain.domain == "dublincore.org"
                           and strings.starts_with(.path, '/schemas/xmls/')
                         )
                  ),
                  // Run link analysis on the filtered URLs to detect phishing
                  ml.link_analysis(.).credphish.disposition == "phishing"
                  // confidence is only returned when brands, if it's not there, consider this true
                  // this ensures if there is a brand, the confidence is high
                  // and allows matching when there is no confidence
                  and coalesce(ml.link_analysis(.).credphish.confidence == "high",
                               true
                  )
                  and not (
                    ml.link_analysis(.).credphish.brand.name is not null
                    and ml.link_analysis(.).credphish.brand.name == "GoDaddy"
                    and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
                                          'is parked free, courtesy of GoDaddy.com.'
                    )
                    and strings.icontains(ml.link_analysis(.).final_dom.inner_text,
                                          'Get This Domain'
                    )
                  )
              )
          )
  )
attack_types:
  - "Credential Phishing"
tactics_and_techniques:
  - "Evasion"
  - "Social engineering"
detection_methods:
  - "File analysis"
  - "URL analysis"
  - "Archive analysis"
  - "Content analysis"
id: "b2cae98d-3a93-5f49-bc81-1b20296cc332"