ChWebsiteApp/CHDataManagement/Generator/PageContentGenerator.swift
2024-12-26 15:01:01 +01:00

534 lines
18 KiB
Swift

import Foundation
import Ink
import Splash
import SwiftSoup
final class PageContentParser {
private static let codeHighlightFooter = "<script>hljs.highlightAll();</script>"
private let swift = SyntaxHighlighter(format: HTMLOutputFormat())
private let language: ContentLanguage
private let content: Content
private let results: PageGenerationResults
private let buttonHandler: ButtonCommandProcessor
private let labelHandler: LabelsCommandProcessor
private let audioPlayer: AudioPlayerCommandProcessor
private let inlineLink: InlineLinkProcessor
private let icons: IconCommandProcessor
var largeImageWidth: Int {
content.settings.pages.largeImageWidth
}
var thumbnailWidth: Int {
content.settings.pages.contentWidth
}
init(content: Content, language: ContentLanguage, results: PageGenerationResults) {
self.content = content
self.results = results
self.language = language
self.buttonHandler = .init(content: content, results: results)
self.labelHandler = .init(content: content, results: results)
self.audioPlayer = .init(content: content, results: results)
self.inlineLink = .init(content: content, results: results, language: language)
self.icons = .init(content: content, results: results)
}
func generatePage(from content: String) -> String {
let parser = MarkdownParser(modifiers: [
Modifier(target: .images, closure: processMarkdownImage),
Modifier(target: .codeBlocks, closure: handleCode),
Modifier(target: .links, closure: inlineLink.handleLink),
Modifier(target: .html, closure: handleHTML),
Modifier(target: .headings, closure: handleHeadlines)
])
return parser.html(from: content)
}
private func handleCode(html: String, markdown: Substring) -> String {
guard markdown.starts(with: "```swift") else {
results.require(header: .codeHightlighting)
results.require(footer: PageContentParser.codeHighlightFooter)
return html // Just use normal code highlighting
}
// Highlight swift code using Splash
let code = markdown.between("```swift", and: "```").trimmed
return "<pre><code>" + swift.highlight(code) + "</pre></code>"
}
private func handleHTML(html: String, _: Substring) -> String {
findResourcesInHtml(html: html)
return html
}
private func findResourcesInHtml(html: String) {
findImages(in: html)
findLinks(in: html)
findSourceSets(in: html)
}
private func findImages(in markdown: String) {
do {
// Parse the HTML string
let document = try SwiftSoup.parse(markdown)
// Select all 'img' elements
let imgElements = try document.select("img")
// Extract the 'src' attributes from each 'img' element
let srcAttributes = try imgElements.array()
.compactMap { try $0.attr("src") }
.filter { !$0.trimmed.isEmpty }
for src in srcAttributes {
results.warning("Found image in html: \(src)")
}
} catch {
print("Error parsing HTML: \(error)")
}
}
private func findLinks(in markdown: String) {
do {
// Parse the HTML string
let document = try SwiftSoup.parse(markdown)
// Select all 'img' elements
let linkElements = try document.select("a")
// Extract the 'src' attributes from each 'img' element
let srcAttributes = try linkElements.array()
.compactMap { try $0.attr("href").trimmed }
.filter { !$0.isEmpty }
for url in srcAttributes {
if url.hasPrefix("http://") || url.hasPrefix("https://") {
results.externalLink(to: url)
} else {
results.warning("Relative link in HTML: \(url)")
}
}
} catch {
print("Error parsing HTML: \(error)")
}
}
private func findSourceSets(in markdown: String) {
do {
// Parse the HTML string
let document = try SwiftSoup.parse(markdown)
// Select all 'img' elements
let linkElements = try document.select("source")
// Extract the 'src' attributes from each 'img' element
let srcsetAttributes = try linkElements.array()
.compactMap { try $0.attr("srcset") }
.filter { !$0.trimmed.isEmpty }
for src in srcsetAttributes {
results.warning("Found source set in html: \(src)")
}
let srcAttributes = try linkElements.array()
.compactMap { try $0.attr("src") }
.filter { !$0.trimmed.isEmpty }
for src in srcAttributes {
guard content.isValidIdForFile(src) else {
results.warning("Found source in html: \(src)")
continue
}
guard let file = content.file(src) else {
results.warning("Found source in html: \(src)")
continue
}
#warning("Either find files by their full path, or replace file id with full path")
results.require(file: file)
}
} catch {
print("Error parsing HTML: \(error)")
}
}
/**
Modify headlines by extracting an id from the headline and adding it into the html element
Format: ##<title>#<id>
The id is created by lowercasing the string, removing all special characters, and replacing spaces with scores
*/
private func handleHeadlines(html: String, markdown: Substring) -> String {
let id = markdown
.last(after: "#")
.trimmed
.filter { $0.isNumber || $0.isLetter || $0 == " " }
.lowercased()
.components(separatedBy: " ")
.filter { $0 != "" }
.joined(separator: "-")
let parts = html.components(separatedBy: ">")
return parts[0] + " id=\"\(id)\">" + parts.dropFirst().joined(separator: ">")
}
private func percentDecoded(_ string: String) -> String {
guard let decoded = string.removingPercentEncoding else {
print("Invalid string: \(string)")
return string
}
return decoded
}
private func processMarkdownImage(html: String, markdown: Substring) -> String {
//
let argumentList = percentDecoded(markdown.between(first: "](", andLast: ")"))
let arguments = argumentList.components(separatedBy: ";")
let rawCommand = percentDecoded(markdown.between("![", and: "]").trimmed)
guard rawCommand != "" else {
return handleImage(arguments, markdown: markdown)
}
guard let command = ShorthandMarkdownKey(rawValue: rawCommand) else {
// Treat unknown commands as normal links
results.invalid(command: nil, markdown)
return html
}
switch command {
case .image:
return handleImage(arguments, markdown: markdown)
case .labels:
return labelHandler.process(arguments, markdown: markdown)
case .buttons:
return buttonHandler.process(arguments, markdown: markdown)
case .video:
return handleVideo(arguments, markdown: markdown)
case .pageLink:
return handlePageLink(arguments, markdown: markdown)
case .includedHtml:
return handleExternalHtml(arguments, markdown: markdown)
case .box:
return handleSimpleBox(arguments, markdown: markdown)
case .model:
return handleModel(arguments, markdown: markdown)
case .svg:
return handleSvg(arguments, markdown: markdown)
case .audioPlayer:
return audioPlayer.process(arguments, markdown: markdown)
case .tagLink:
return handleTagLink(arguments, markdown: markdown)
case .icons:
return icons.process(arguments, markdown: markdown)
}
}
/**
Format: `![image](<imageId>;<caption?>]`
*/
private func handleImage(_ arguments: [String], markdown: Substring) -> String {
guard (1...2).contains(arguments.count) else {
results.invalid(command: .image, markdown)
return ""
}
let imageId = arguments[0]
guard let image = content.image(imageId) else {
results.missing(file: imageId, source: "Image command")
return ""
}
results.used(file: image)
let caption = arguments.count == 2 ? arguments[1] : nil
let altText = image.localized(in: language)
let path = image.absoluteUrl
guard !image.type.isSvg else {
return SvgImage(imagePath: path, altText: altText).content
}
let thumbnail = FeedEntryData.Image(
rawImagePath: path,
width: thumbnailWidth,
height: thumbnailWidth,
altText: altText)
results.requireImageSet(for: image, size: thumbnailWidth)
let largeImage = FeedEntryData.Image(
rawImagePath: path,
width: largeImageWidth,
height: largeImageWidth,
altText: altText)
results.requireImageSet(for: image, size: largeImageWidth)
return PageImage(
imageId: imageId.replacingOccurrences(of: ".", with: "-"),
thumbnail: thumbnail,
largeImage: largeImage,
caption: caption).content
}
/**
Format: `![video](<fileId>;<option1...>]`
*/
private func handleVideo(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count >= 1 else {
results.invalid(command: .video, markdown)
return ""
}
let fileId = arguments[0].trimmed
let options = arguments.dropFirst().compactMap { convertVideoOption($0, markdown: markdown) }
guard let file = content.file(fileId) else {
results.missing(file: fileId, source: "Video command")
return ""
}
#warning("Create/specify video alternatives")
results.require(file: file)
guard let videoType = file.type.htmlType else {
results.invalid(command: .video, markdown)
return ""
}
return ContentPageVideo(
filePath: file.absoluteUrl,
videoType: videoType,
options: options)
.content
}
private func convertVideoOption(_ videoOption: String, markdown: Substring) -> VideoOption? {
guard let optionText = videoOption.trimmed.nonEmpty else {
return nil
}
guard let option = VideoOption(rawValue: optionText) else {
results.invalid(command: .video, markdown)
return nil
}
if case let .poster(imageId) = option {
if let image = content.image(imageId) {
results.used(file: image)
let width = 2*thumbnailWidth
let fullLink = WebsiteImage.imagePath(source: image.absoluteUrl, width: width, height: width)
return .poster(image: fullLink)
} else {
results.missing(file: imageId, source: "Video command poster")
return nil // Image file not present, so skip the option
}
}
if case let .src(videoId) = option {
if let video = content.video(videoId) {
results.used(file: video)
let link = video.absoluteUrl
return .src(link)
} else {
results.missing(file: videoId, source: "Video command source")
return nil // Video file not present, so skip the option
}
}
return option
}
/**
Format: `![html](<fileId>)`
*/
private func handleExternalHtml(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .includedHtml, markdown)
return ""
}
let fileId = arguments[0]
guard let file = content.file(fileId) else {
results.missing(file: fileId, source: "External HTML command")
return ""
}
let content = file.textContent()
findResourcesInHtml(html: content)
return content
}
/**
Format: `![box](<title>;<body>)`
*/
private func handleSimpleBox(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count > 1 else {
results.invalid(command: .box, markdown)
return ""
}
let title = arguments[0]
let text = arguments.dropFirst().joined(separator: ";")
return ContentBox(title: title, text: text).content
}
/**
Format: `![page](<pageId>)`
*/
private func handlePageLink(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .pageLink, markdown)
return ""
}
let pageId = arguments[0]
guard let page = content.page(pageId) else {
results.missing(page: pageId, source: "Page link command")
return ""
}
guard !page.isDraft else {
// Prevent linking to unpublished content
return ""
}
results.linked(to: page)
let localized = page.localized(in: language)
let url = page.absoluteUrl(in: language)
let title = localized.linkPreviewTitle ?? localized.title
let description = localized.linkPreviewDescription ?? ""
let image = localized.linkPreviewImage.map { image in
let size = content.settings.pages.pageLinkImageSize
results.used(file: image)
results.requireImageSet(for: image, size: size)
return RelatedPageLink.Image(
url: image.absoluteUrl,
description: image.localized(in: language),
size: size)
}
return RelatedPageLink(
title: title,
description: description,
url: url,
image: image)
.content
}
/**
Format: `![tag](<tagId>)`
*/
private func handleTagLink(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .tagLink, markdown)
return ""
}
let tagId = arguments[0]
guard let tag = content.tag(tagId) else {
results.missing(tag: tagId, source: "Tag link command")
return ""
}
let localized = tag.localized(in: language)
let url = tag.absoluteUrl(in: language)
let title = localized.name
let description = localized.description ?? ""
let image = localized.linkPreviewImage.map { image in
let size = content.settings.pages.pageLinkImageSize
results.requireImageSet(for: image, size: size)
return RelatedPageLink.Image(
url: image.absoluteUrl,
description: image.localized(in: language),
size: size)
}
return RelatedPageLink(
title: title,
description: description,
url: url,
image: image)
.content
}
/**
Format: `![model](<file>)`
*/
private func handleModel(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .model, markdown)
return ""
}
let fileId = arguments[0]
guard fileId.hasSuffix(".glb") else {
results.invalid(command: .model, markdown)
return ""
}
guard let file = content.file(fileId) else {
results.missing(file: fileId, source: "Model command")
return ""
}
results.require(file: file)
results.require(header: .modelViewer)
let description = file.localized(in: language)
return ModelViewer(file: file.absoluteUrl, description: description).content
}
private func handleSvg(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 5 else {
results.invalid(command: .svg, markdown)
return ""
}
guard let x = Int(arguments[1]),
let y = Int(arguments[2]),
let partWidth = Int(arguments[3]),
let partHeight = Int(arguments[4]) else {
results.invalid(command: .svg, markdown)
return ""
}
let imageId = arguments[0]
guard let image = content.image(imageId) else {
results.missing(file: imageId, source: "SVG command")
return ""
}
guard image.type.isSvg else {
results.invalid(command: .svg, markdown)
return ""
}
return PartialSvgImage(
imagePath: image.absoluteUrl,
altText: image.localized(in: language),
x: x,
y: y,
width: partWidth,
height: partHeight)
.content
}
}
/*
private func handleGif(file: String, altText: String) -> String {
let imagePath = page.pathRelativeToRootForContainedInputFile(file)
results.require(file: imagePath, source: page.path)
guard let size = results.getImageSize(atPath: imagePath, source: page.path) else {
return ""
}
let width = Int(size.width)
let height = Int(size.height)
return factory.html.image(file: file, width: width, height: height, altText: altText)
}
*/