ChWebsiteApp/CHDataManagement/Generator/PageContentGenerator.swift
2024-12-14 16:31:40 +01:00

517 lines
18 KiB
Swift

import Foundation
import Ink
import Splash
import SwiftSoup
typealias VideoSource = (url: String, type: VideoFileType)
final class PageContentParser {
private let pageLinkMarker = "page:"
private let tagLinkMarker = "tag:"
private static let codeHighlightFooter = "<script>hljs.highlightAll();</script>"
private let swift = SyntaxHighlighter(format: HTMLOutputFormat())
let results = PageGenerationResults()
private let content: Content
private let buttonHandler: ButtonCommandProcessor
private let labelHandler: LabelsCommandProcessor
private let audioPlayer: AudioPlayerCommandProcessor
let language: ContentLanguage
var largeImageWidth: Int {
content.settings.pages.largeImageWidth
}
var thumbnailWidth: Int {
content.settings.pages.contentWidth
}
init(content: Content, language: ContentLanguage) {
self.content = content
self.language = language
self.buttonHandler = .init(content: content, results: results)
self.labelHandler = .init(content: content, results: results)
self.audioPlayer = .init(content: content, results: results)
}
func requestImages(_ generator: ImageGenerator) {
for request in results.imagesToGenerate {
generator.generateImageSet(
for: request.image.id,
maxWidth: CGFloat(request.size),
maxHeight: CGFloat(request.size))
}
}
func reset() {
results.reset()
}
func generatePage(from content: String) -> String {
reset()
let parser = MarkdownParser(modifiers: [
Modifier(target: .images, closure: processMarkdownImage),
Modifier(target: .codeBlocks, closure: handleCode),
Modifier(target: .links, closure: handleLink),
Modifier(target: .html, closure: handleHTML),
Modifier(target: .headings, closure: handleHeadlines)
])
return parser.html(from: content)
}
private func handleCode(html: String, markdown: Substring) -> String {
guard markdown.starts(with: "```swift") else {
results.requiredHeaders.insert(.codeHightlighting)
results.requiredFooters.insert(PageContentParser.codeHighlightFooter)
return html // Just use normal code highlighting
}
// Highlight swift code using Splash
let code = markdown.between("```swift", and: "```").trimmed
return "<pre><code>" + swift.highlight(code) + "</pre></code>"
}
private func handleLink(html: String, markdown: Substring) -> String {
let file = markdown.between("(", and: ")")
if file.hasPrefix(pageLinkMarker) {
return handlePageLink(file: file, html: html, markdown: markdown)
}
if file.hasPrefix(tagLinkMarker) {
return handleTagLink(file: file, html: html, markdown: markdown)
}
results.externalLinks.insert(file)
return html
}
private func handlePageLink(file: String, html: String, markdown: Substring) -> String {
// Retain links pointing to elements within a page
let textToChange = file.dropAfterFirst("#")
let pageId = textToChange.replacingOccurrences(of: pageLinkMarker, with: "")
guard let page = content.page(pageId) else {
results.missing(page: pageId, markdown: markdown)
// Remove link since the page can't be found
return markdown.between("[", and: "]")
}
results.linkedPages.insert(page)
let pagePath = page.absoluteUrl(for: language)
return html.replacingOccurrences(of: textToChange, with: pagePath)
}
private func handleTagLink(file: String, html: String, markdown: Substring) -> String {
// Retain links pointing to elements within a page
let textToChange = file.dropAfterFirst("#")
let tagId = textToChange.replacingOccurrences(of: tagLinkMarker, with: "")
guard let tag = content.tag(tagId) else {
results.missing(tag: tagId, markdown: markdown)
// Remove link since the tag can't be found
return markdown.between("[", and: "]")
}
results.linkedTags.insert(tag)
let tagPath = content.absoluteUrlToTag(tag, language: language)
return html.replacingOccurrences(of: textToChange, with: tagPath)
}
private func handleHTML(_: String, markdown: Substring) -> String {
let result = String(markdown)
#warning("Check HTML code in markdown for required resources")
findImages(in: result)
findLinks(in: result)
findSourceSets(in: result)
// Things to check: <img src= <a href= <source>
return result
}
private func findImages(in markdown: String) {
do {
// Parse the HTML string
let document = try SwiftSoup.parse(markdown)
// Select all 'img' elements
let imgElements = try document.select("img")
// Extract the 'src' attributes from each 'img' element
let srcAttributes = try imgElements.array().compactMap { try $0.attr("src") }
for src in srcAttributes {
print("Found image in html: \(src)")
}
} catch {
print("Error parsing HTML: \(error)")
}
}
private func findLinks(in markdown: String) {
do {
// Parse the HTML string
let document = try SwiftSoup.parse(markdown)
// Select all 'img' elements
let linkElements = try document.select("a")
// Extract the 'src' attributes from each 'img' element
let srcAttributes = try linkElements.array().compactMap { try $0.attr("href") }
for src in srcAttributes {
print("Found link in html: \(src)")
}
} catch {
print("Error parsing HTML: \(error)")
}
}
private func findSourceSets(in markdown: String) {
do {
// Parse the HTML string
let document = try SwiftSoup.parse(markdown)
// Select all 'img' elements
let linkElements = try document.select("source")
// Extract the 'src' attributes from each 'img' element
let srcAttributes = try linkElements.array().compactMap { try $0.attr("srcset") }
for src in srcAttributes {
print("Found source set in html: \(src)")
}
} catch {
print("Error parsing HTML: \(error)")
}
}
/**
Modify headlines by extracting an id from the headline and adding it into the html element
Format: ##<title>#<id>
The id is created by lowercasing the string, removing all special characters, and replacing spaces with scores
*/
private func handleHeadlines(html: String, markdown: Substring) -> String {
let id = markdown
.last(after: "#")
.trimmed
.filter { $0.isNumber || $0.isLetter || $0 == " " }
.lowercased()
.components(separatedBy: " ")
.filter { $0 != "" }
.joined(separator: "-")
let parts = html.components(separatedBy: ">")
return parts[0] + " id=\"\(id)\">" + parts.dropFirst().joined(separator: ">")
}
private func percentDecoded(_ string: String) -> String {
guard let decoded = string.removingPercentEncoding else {
print("Invalid string: \(string)")
return string
}
return decoded
}
private func processMarkdownImage(html: String, markdown: Substring) -> String {
//
let argumentList = percentDecoded(markdown.between(first: "](", andLast: ")"))
let arguments = argumentList.components(separatedBy: ";")
let rawCommand = percentDecoded(markdown.between("![", and: "]").trimmed)
guard rawCommand != "" else {
return handleImage(arguments, markdown: markdown)
}
guard let command = ShorthandMarkdownKey(rawValue: rawCommand) else {
// Treat unknown commands as normal links
results.invalid(command: nil, markdown)
return html
}
switch command {
case .image:
return handleImage(arguments, markdown: markdown)
case .labels:
return labelHandler.process(arguments, markdown: markdown)
case .buttons:
return buttonHandler.process(arguments, markdown: markdown)
case .video:
return handleVideo(arguments, markdown: markdown)
case .pageLink:
return handlePageLink(arguments, markdown: markdown)
case .includedHtml:
return handleExternalHtml(arguments, markdown: markdown)
case .box:
return handleSimpleBox(arguments, markdown: markdown)
case .model:
return handleModel(arguments, markdown: markdown)
case .svg:
return handleSvg(arguments, markdown: markdown)
case .audioPlayer:
return audioPlayer.process(arguments, markdown: markdown)
default:
results.invalid(command: nil, markdown)
return ""
}
}
/**
Format: `[image](<imageId>;<caption?>]`
*/
private func handleImage(_ arguments: [String], markdown: Substring) -> String {
guard (1...2).contains(arguments.count) else {
results.invalid(command: .image, markdown)
return ""
}
let imageId = arguments[0]
guard let image = content.image(imageId) else {
results.missing(file: imageId, markdown: markdown)
return ""
}
results.files.insert(image)
let caption = arguments.count == 2 ? arguments[1] : nil
let altText = image.getDescription(for: language)
let path = image.absoluteUrl
guard !image.type.isSvg else {
return SvgImage(imagePath: path, altText: altText).content
}
let thumbnail = FeedEntryData.Image(
rawImagePath: path,
width: thumbnailWidth,
height: thumbnailWidth,
altText: altText)
results.imagesToGenerate.insert(.init(size: thumbnailWidth, image: image))
let largeImage = FeedEntryData.Image(
rawImagePath: path,
width: largeImageWidth,
height: largeImageWidth,
altText: altText)
results.imagesToGenerate.insert(.init(size: largeImageWidth, image: image))
return PageImage(
imageId: imageId.replacingOccurrences(of: ".", with: "-"),
thumbnail: thumbnail,
largeImage: largeImage,
caption: caption).content
}
/**
Format: `![video](<fileId>;<option1...>]`
*/
private func handleVideo(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count >= 1 else {
results.invalid(command: .video, markdown)
return ""
}
let fileId = arguments[0].trimmed
let options = arguments.dropFirst().compactMap { convertVideoOption($0, markdown: markdown) }
guard let file = content.file(fileId) else {
results.missing(file: fileId, markdown: markdown)
return ""
}
results.files.insert(file)
guard let videoType = file.type.videoType?.htmlType else {
results.invalid(command: .video, markdown)
return ""
}
return ContentPageVideo(
filePath: file.absoluteUrl,
videoType: videoType,
options: options)
.content
}
private func convertVideoOption(_ videoOption: String, markdown: Substring) -> VideoOption? {
guard let optionText = videoOption.trimmed.nonEmpty else {
return nil
}
guard let option = VideoOption(rawValue: optionText) else {
results.invalid(command: .video, markdown)
return nil
}
if case let .poster(imageId) = option {
if let image = content.image(imageId) {
results.files.insert(image)
let width = 2*thumbnailWidth
let fullLink = WebsiteImage.imagePath(source: image.absoluteUrl, width: width, height: width)
return .poster(image: fullLink)
} else {
results.missing(file: imageId, markdown: markdown)
return nil // Image file not present, so skip the option
}
}
if case let .src(videoId) = option {
if let video = content.video(videoId) {
results.files.insert(video)
let link = video.absoluteUrl
// TODO: Set correct video path?
return .src(link)
} else {
results.missing(file: videoId, markdown: markdown)
return nil // Video file not present, so skip the option
}
}
return option
}
/**
Format: `![html](<fileId>)`
*/
private func handleExternalHtml(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .includedHtml, markdown)
return ""
}
let fileId = arguments[0]
guard let file = content.file(fileId) else {
results.missing(file: fileId, markdown: markdown)
return ""
}
return file.textContent()
}
/**
Format: `![box](<title>;<body>)`
*/
private func handleSimpleBox(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count > 1 else {
results.invalid(command: .box, markdown)
return ""
}
let title = arguments[0]
let text = arguments.dropFirst().joined(separator: ";")
return ContentBox(title: title, text: text).content
}
/**
Format: `![page](<pageId>)`
*/
private func handlePageLink(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .pageLink, markdown)
return ""
}
let pageId = arguments[0]
guard let page = content.page(pageId) else {
results.missing(page: pageId, markdown: markdown)
return ""
}
guard !page.isDraft else {
// Prevent linking to unpublished content
return ""
}
let localized = page.localized(in: language)
let url = page.absoluteUrl(for: language)
let title = localized.linkPreviewTitle ?? localized.title
let description = localized.linkPreviewDescription ?? ""
let image = localized.linkPreviewImage.map { image in
let size = content.settings.pages.pageLinkImageSize
results.files.insert(image)
results.imagesToGenerate.insert(.init(size: size, image: image))
return RelatedPageLink.Image(
url: image.absoluteUrl,
description: image.getDescription(for: language),
size: size)
}
return RelatedPageLink(
title: title,
description: description,
url: url,
image: image)
.content
}
/**
Format: `![model](<file>)`
*/
private func handleModel(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 1 else {
results.invalid(command: .model, markdown)
return ""
}
let fileId = arguments[0]
guard fileId.hasSuffix(".glb") else {
results.invalid(command: .model, markdown)
return ""
}
guard let file = content.file(fileId) else {
results.missing(file: fileId, markdown: markdown)
return ""
}
results.files.insert(file)
results.requiredHeaders.insert(.modelViewer)
let description = file.getDescription(for: language)
return ModelViewer(file: file.absoluteUrl, description: description).content
}
private func handleSvg(_ arguments: [String], markdown: Substring) -> String {
guard arguments.count == 5 else {
results.invalid(command: .svg, markdown)
return ""
}
guard let x = Int(arguments[1]),
let y = Int(arguments[2]),
let partWidth = Int(arguments[3]),
let partHeight = Int(arguments[4]) else {
results.invalid(command: .svg, markdown)
return ""
}
let imageId = arguments[0]
guard let image = content.image(imageId) else {
results.missing(file: imageId, markdown: markdown)
return ""
}
guard case .image(let imageType) = image.type,
imageType == .svg else {
results.invalid(command: .svg, markdown)
return ""
}
return PartialSvgImage(
imagePath: image.absoluteUrl,
altText: image.getDescription(for: language),
x: x,
y: y,
width: partWidth,
height: partHeight)
.content
}
}
/*
private func handleGif(file: String, altText: String) -> String {
let imagePath = page.pathRelativeToRootForContainedInputFile(file)
results.require(file: imagePath, source: page.path)
guard let size = results.getImageSize(atPath: imagePath, source: page.path) else {
return ""
}
let width = Int(size.width)
let height = Int(size.height)
return factory.html.image(file: file, width: width, height: height, altText: altText)
}
*/