2025-01-02 11:56:51 +01:00

154 lines
4.4 KiB
Swift

import SwiftSoup
/**
Handles both inline HTML and the external HTML command
*/
struct PageHtmlProcessor: CommandProcessor {
let commandType: ShorthandMarkdownKey = .includedHtml
let results: PageGenerationResults
let content: Content
init(content: Content, results: PageGenerationResults) {
self.content = content
self.results = results
}
/**
Handle the HTML command
Format: `![html](<fileId>)`
*/
func process(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .includedHtml, markdown)
return ""
}
let fileId = arguments[0]
guard let file = content.file(fileId) else {
results.missing(file: fileId, source: "External HTML command")
return ""
}
let content = file.textContent()
checkResources(in: content)
return content
}
/**
Handle inline HTML
*/
func process(_ html: String, markdown: Substring) -> String {
checkResources(in: html)
return html
}
private func checkResources(in html: String) {
let document: Document
do {
document = try SwiftSoup.parse(html)
} catch {
results.warning("Failed to parse inline HTML: \(error)")
return
}
checkImages(in: document)
checkLinks(in: document)
checkSourceSets(in: document)
}
private func checkImages(in document: Document) {
let srcAttributes: [String]
do {
let imgElements = try document.select("img")
srcAttributes = try imgElements.array()
.compactMap { try $0.attr("src") }
.filter { !$0.trimmed.isEmpty }
} catch {
results.warning("Failed to check 'src' attributes of <img> elements in inline HTML: \(error)")
return
}
for src in srcAttributes {
results.warning("Found image in html: \(src)")
}
}
private func checkLinks(in document: Document) {
let hrefs: [String]
do {
let linkElements = try document.select("a")
hrefs = try linkElements.array()
.compactMap { try $0.attr("href").trimmed }
.filter { !$0.isEmpty }
} catch {
results.warning("Failed to check 'href' attributes of <a> elements in inline HTML: \(error)")
return
}
for url in hrefs {
if url.hasPrefix("http://") || url.hasPrefix("https://") {
results.externalLink(to: url)
} else {
results.warning("Relative link in HTML: \(url)")
}
}
}
private func checkSourceSets(in document: Document) {
let sources: [Element]
do {
sources = try document.select("source").array()
} catch {
results.warning("Failed to find <source> elements in inline HTML: \(error)")
return
}
}
private func checkSourceSetAttributes(sources: [Element]) {
let srcSets: [String]
do {
srcSets = try sources
.compactMap { try $0.attr("srcset") }
.filter { !$0.trimmed.isEmpty }
} catch {
results.warning("Failed to check 'srcset' attributes of <source> elements in inline HTML: \(error)")
return
}
for src in srcSets {
results.warning("Found source set in html: \(src)")
}
}
private func checkSourceAttributes(sources: [Element]) {
let srcAttributes: [String]
do {
srcAttributes = try sources
.compactMap { try $0.attr("src") }
.filter { !$0.trimmed.isEmpty }
} catch {
results.warning("Failed to check 'src' attributes of <source> elements in inline HTML: \(error)")
return
}
for src in srcAttributes {
guard content.isValidIdForFile(src) else {
results.warning("Found source in html: \(src)")
continue
}
guard let file = content.file(src) else {
results.warning("Found source in html: \(src)")
continue
}
#warning("Either find files by their full path, or replace file id with full path")
results.require(file: file)
}
}
}