import SwiftSoup /** Handles both inline HTML and the external HTML command */ struct PageHtmlProcessor: CommandProcessor { let commandType: ShorthandMarkdownKey = .includedHtml let results: PageGenerationResults let content: Content init(content: Content, results: PageGenerationResults, language: ContentLanguage) { self.content = content self.results = results } /** Handle the HTML command Format: `![html]()` */ func process(_ arguments: [String], markdown: Substring) -> String { guard arguments.count == 1 else { results.invalid(command: .includedHtml, markdown) return "" } let fileId = arguments[0] guard let file = content.file(fileId) else { results.missing(file: fileId, source: "External HTML command") return "" } let content = file.textContent() checkResources(in: content) return content } /** Handle inline HTML */ func process(_ html: String, markdown: Substring) -> String { checkResources(in: html) return html } private func checkResources(in html: String) { let document: Document do { document = try SwiftSoup.parse(html) } catch { results.warning("Failed to parse inline HTML: \(error)") return } checkImages(in: document) checkLinks(in: document) checkSourceSets(in: document) } private func checkImages(in document: Document) { let srcAttributes: [String] do { let imgElements = try document.select("img") srcAttributes = try imgElements.array() .compactMap { try $0.attr("src") } .filter { !$0.trimmed.isEmpty } } catch { results.warning("Failed to check 'src' attributes of elements in inline HTML: \(error)") return } for src in srcAttributes { results.warning("Found image in html: \(src)") } } private func checkLinks(in document: Document) { let hrefs: [String] do { let linkElements = try document.select("a") hrefs = try linkElements.array() .compactMap { try $0.attr("href").trimmed } .filter { !$0.isEmpty } } catch { results.warning("Failed to check 'href' attributes of elements in inline HTML: \(error)") return } for url in hrefs { if url.hasPrefix("http://") || url.hasPrefix("https://") { results.externalLink(to: url) } else { results.warning("Relative link in HTML: \(url)") } } } private func checkSourceSets(in document: Document) { let sources: [Element] do { sources = try document.select("source").array() } catch { results.warning("Failed to find elements in inline HTML: \(error)") return } checkSourceSetAttributes(sources: sources) checkSourceAttributes(sources: sources) } private func checkSourceSetAttributes(sources: [Element]) { let srcSets: [String] do { srcSets = try sources .compactMap { try $0.attr("srcset") } .filter { !$0.trimmed.isEmpty } } catch { results.warning("Failed to check 'srcset' attributes of elements in inline HTML: \(error)") return } for src in srcSets { results.warning("Found source set in html: \(src)") } } private func checkSourceAttributes(sources: [Element]) { let srcAttributes: [String] do { srcAttributes = try sources .compactMap { try $0.attr("src") } .filter { !$0.trimmed.isEmpty } } catch { results.warning("Failed to check 'src' attributes of elements in inline HTML: \(error)") return } for src in srcAttributes { guard content.isValidIdForFile(src) else { results.warning("Found source in html: \(src)") continue } guard let file = content.file(src) else { results.warning("Found source in html: \(src)") continue } #warning("Either find files by their full path, or replace file id with full path") results.require(file: file) } } }