import Foundation import Ink import Splash import SwiftSoup typealias VideoSource = (url: String, type: VideoFileType) final class PageContentParser { private let pageLinkMarker = "page:" private let tagLinkMarker = "tag:" private static let codeHighlightFooter = "" private let swift = SyntaxHighlighter(format: HTMLOutputFormat()) let results = PageGenerationResults() private let content: Content private let buttonHandler: ButtonCommandProcessor private let labelHandler: LabelsCommandProcessor private let audioPlayer: AudioPlayerCommandProcessor let language: ContentLanguage var largeImageWidth: Int { content.settings.pages.largeImageWidth } var thumbnailWidth: Int { content.settings.pages.contentWidth } init(content: Content, language: ContentLanguage) { self.content = content self.language = language self.buttonHandler = .init(content: content, results: results) self.labelHandler = .init(content: content, results: results) self.audioPlayer = .init(content: content, results: results) } func requestImages(_ generator: ImageGenerator) { for request in results.imagesToGenerate { generator.generateImageSet( for: request.image.id, maxWidth: CGFloat(request.size), maxHeight: CGFloat(request.size)) } } func reset() { results.reset() } func generatePage(from content: String) -> String { reset() let parser = MarkdownParser(modifiers: [ Modifier(target: .images, closure: processMarkdownImage), Modifier(target: .codeBlocks, closure: handleCode), Modifier(target: .links, closure: handleLink), Modifier(target: .html, closure: handleHTML), Modifier(target: .headings, closure: handleHeadlines) ]) return parser.html(from: content) } private func handleCode(html: String, markdown: Substring) -> String { guard markdown.starts(with: "```swift") else { results.requiredHeaders.insert(.codeHightlighting) results.requiredFooters.insert(PageContentParser.codeHighlightFooter) return html // Just use normal code highlighting } // Highlight swift code using Splash let code = markdown.between("```swift", and: "```").trimmed return "
" + swift.highlight(code) + "
" } private func handleLink(html: String, markdown: Substring) -> String { let file = markdown.between("(", and: ")") if file.hasPrefix(pageLinkMarker) { return handlePageLink(file: file, html: html, markdown: markdown) } if file.hasPrefix(tagLinkMarker) { return handleTagLink(file: file, html: html, markdown: markdown) } results.externalLinks.insert(file) return html } private func handlePageLink(file: String, html: String, markdown: Substring) -> String { // Retain links pointing to elements within a page let textToChange = file.dropAfterFirst("#") let pageId = textToChange.replacingOccurrences(of: pageLinkMarker, with: "") guard let page = content.page(pageId) else { results.missing(page: pageId, markdown: markdown) // Remove link since the page can't be found return markdown.between("[", and: "]") } results.linkedPages.insert(page) let pagePath = page.absoluteUrl(for: language) return html.replacingOccurrences(of: textToChange, with: pagePath) } private func handleTagLink(file: String, html: String, markdown: Substring) -> String { // Retain links pointing to elements within a page let textToChange = file.dropAfterFirst("#") let tagId = textToChange.replacingOccurrences(of: tagLinkMarker, with: "") guard let tag = content.tag(tagId) else { results.missing(tag: tagId, markdown: markdown) // Remove link since the tag can't be found return markdown.between("[", and: "]") } results.linkedTags.insert(tag) let tagPath = content.absoluteUrlToTag(tag, language: language) return html.replacingOccurrences(of: textToChange, with: tagPath) } private func handleHTML(_: String, markdown: Substring) -> String { let result = String(markdown) findImages(in: result) findLinks(in: result) findSourceSets(in: result) return result } private func findImages(in markdown: String) { do { // Parse the HTML string let document = try SwiftSoup.parse(markdown) // Select all 'img' elements let imgElements = try document.select("img") // Extract the 'src' attributes from each 'img' element let srcAttributes = try imgElements.array() .compactMap { try $0.attr("src") } .filter { !$0.trimmed.isEmpty } for src in srcAttributes { results.issues.insert(.warning("Found image in html: \(src)")) } } catch { print("Error parsing HTML: \(error)") } } private func findLinks(in markdown: String) { do { // Parse the HTML string let document = try SwiftSoup.parse(markdown) // Select all 'img' elements let linkElements = try document.select("a") // Extract the 'src' attributes from each 'img' element let srcAttributes = try linkElements.array() .compactMap { try $0.attr("href").trimmed } .filter { !$0.isEmpty } for url in srcAttributes { if url.hasPrefix("http://") || url.hasPrefix("https://") { results.externalLinks.insert(url) } else { results.issues.insert(.warning("Relative link in HTML: \(url)")) } } } catch { print("Error parsing HTML: \(error)") } } private func findSourceSets(in markdown: String) { do { // Parse the HTML string let document = try SwiftSoup.parse(markdown) // Select all 'img' elements let linkElements = try document.select("source") // Extract the 'src' attributes from each 'img' element let srcsetAttributes = try linkElements.array() .compactMap { try $0.attr("srcset") } .filter { !$0.trimmed.isEmpty } for src in srcsetAttributes { results.issues.insert(.warning("Found source set in html: \(src)")) } let srcAttributes = try linkElements.array() .compactMap { try $0.attr("src") } .filter { !$0.trimmed.isEmpty } for src in srcAttributes { guard content.isValidIdForFile(src) else { results.issues.insert(.warning("Found source in html: \(src)")) continue } guard let file = content.file(src) else { results.issues.insert(.warning("Found source in html: \(src)")) continue } results.files.insert(file) } } catch { print("Error parsing HTML: \(error)") } } /** Modify headlines by extracting an id from the headline and adding it into the html element Format: ###<id> The id is created by lowercasing the string, removing all special characters, and replacing spaces with scores */ private func handleHeadlines(html: String, markdown: Substring) -> String { let id = markdown .last(after: "#") .trimmed .filter { $0.isNumber || $0.isLetter || $0 == " " } .lowercased() .components(separatedBy: " ") .filter { $0 != "" } .joined(separator: "-") let parts = html.components(separatedBy: ">") return parts[0] + " id=\"\(id)\">" + parts.dropFirst().joined(separator: ">") } private func percentDecoded(_ string: String) -> String { guard let decoded = string.removingPercentEncoding else { print("Invalid string: \(string)") return string } return decoded } private func processMarkdownImage(html: String, markdown: Substring) -> String { // let argumentList = percentDecoded(markdown.between(first: "](", andLast: ")")) let arguments = argumentList.components(separatedBy: ";") let rawCommand = percentDecoded(markdown.between("![", and: "]").trimmed) guard rawCommand != "" else { return handleImage(arguments, markdown: markdown) } guard let command = ShorthandMarkdownKey(rawValue: rawCommand) else { // Treat unknown commands as normal links results.invalid(command: nil, markdown) return html } switch command { case .image: return handleImage(arguments, markdown: markdown) case .labels: return labelHandler.process(arguments, markdown: markdown) case .buttons: return buttonHandler.process(arguments, markdown: markdown) case .video: return handleVideo(arguments, markdown: markdown) case .pageLink: return handlePageLink(arguments, markdown: markdown) case .includedHtml: return handleExternalHtml(arguments, markdown: markdown) case .box: return handleSimpleBox(arguments, markdown: markdown) case .model: return handleModel(arguments, markdown: markdown) case .svg: return handleSvg(arguments, markdown: markdown) case .audioPlayer: return audioPlayer.process(arguments, markdown: markdown) case .tagLink: return handleTagLink(arguments, markdown: markdown) } } /** Format: `[image](<imageId>;<caption?>]` */ private func handleImage(_ arguments: [String], markdown: Substring) -> String { guard (1...2).contains(arguments.count) else { results.invalid(command: .image, markdown) return "" } let imageId = arguments[0] guard let image = content.image(imageId) else { results.missing(file: imageId, markdown: markdown) return "" } results.files.insert(image) let caption = arguments.count == 2 ? arguments[1] : nil let altText = image.getDescription(for: language) let path = image.absoluteUrl guard !image.type.isSvg else { return SvgImage(imagePath: path, altText: altText).content } let thumbnail = FeedEntryData.Image( rawImagePath: path, width: thumbnailWidth, height: thumbnailWidth, altText: altText) results.imagesToGenerate.insert(.init(size: thumbnailWidth, image: image)) let largeImage = FeedEntryData.Image( rawImagePath: path, width: largeImageWidth, height: largeImageWidth, altText: altText) results.imagesToGenerate.insert(.init(size: largeImageWidth, image: image)) return PageImage( imageId: imageId.replacingOccurrences(of: ".", with: "-"), thumbnail: thumbnail, largeImage: largeImage, caption: caption).content } /** Format: `![video](<fileId>;<option1...>]` */ private func handleVideo(_ arguments: [String], markdown: Substring) -> String { guard arguments.count >= 1 else { results.invalid(command: .video, markdown) return "" } let fileId = arguments[0].trimmed let options = arguments.dropFirst().compactMap { convertVideoOption($0, markdown: markdown) } guard let file = content.file(fileId) else { results.missing(file: fileId, markdown: markdown) return "" } results.files.insert(file) guard let videoType = file.type.videoType?.htmlType else { results.invalid(command: .video, markdown) return "" } return ContentPageVideo( filePath: file.absoluteUrl, videoType: videoType, options: options) .content } private func convertVideoOption(_ videoOption: String, markdown: Substring) -> VideoOption? { guard let optionText = videoOption.trimmed.nonEmpty else { return nil } guard let option = VideoOption(rawValue: optionText) else { results.invalid(command: .video, markdown) return nil } if case let .poster(imageId) = option { if let image = content.image(imageId) { results.files.insert(image) let width = 2*thumbnailWidth let fullLink = WebsiteImage.imagePath(source: image.absoluteUrl, width: width, height: width) return .poster(image: fullLink) } else { results.missing(file: imageId, markdown: markdown) return nil // Image file not present, so skip the option } } if case let .src(videoId) = option { if let video = content.video(videoId) { results.files.insert(video) let link = video.absoluteUrl // TODO: Set correct video path? return .src(link) } else { results.missing(file: videoId, markdown: markdown) return nil // Video file not present, so skip the option } } return option } /** Format: `![html](<fileId>)` */ private func handleExternalHtml(_ arguments: [String], markdown: Substring) -> String { guard arguments.count == 1 else { results.invalid(command: .includedHtml, markdown) return "" } let fileId = arguments[0] guard let file = content.file(fileId) else { results.missing(file: fileId, markdown: markdown) return "" } return file.textContent() } /** Format: `![box](<title>;<body>)` */ private func handleSimpleBox(_ arguments: [String], markdown: Substring) -> String { guard arguments.count > 1 else { results.invalid(command: .box, markdown) return "" } let title = arguments[0] let text = arguments.dropFirst().joined(separator: ";") return ContentBox(title: title, text: text).content } /** Format: `![page](<pageId>)` */ private func handlePageLink(_ arguments: [String], markdown: Substring) -> String { guard arguments.count == 1 else { results.invalid(command: .pageLink, markdown) return "" } let pageId = arguments[0] guard let page = content.page(pageId) else { results.missing(page: pageId, markdown: markdown) return "" } guard !page.isDraft else { // Prevent linking to unpublished content return "" } let localized = page.localized(in: language) let url = page.absoluteUrl(for: language) let title = localized.linkPreviewTitle ?? localized.title let description = localized.linkPreviewDescription ?? "" let image = localized.linkPreviewImage.map { image in let size = content.settings.pages.pageLinkImageSize results.files.insert(image) results.imagesToGenerate.insert(.init(size: size, image: image)) return RelatedPageLink.Image( url: image.absoluteUrl, description: image.getDescription(for: language), size: size) } return RelatedPageLink( title: title, description: description, url: url, image: image) .content } /** Format: `![tag](<tagId>)` */ private func handleTagLink(_ arguments: [String], markdown: Substring) -> String { guard arguments.count == 1 else { results.invalid(command: .tagLink, markdown) return "" } let tagId = arguments[0] guard let tag = content.tag(tagId) else { results.missing(tag: tagId, markdown: markdown) return "" } let localized = tag.localized(in: language) let url = tag.absoluteUrl(for: language) let title = localized.name let description = localized.description ?? "" let image = localized.linkPreviewImage.map { image in let size = content.settings.pages.pageLinkImageSize results.files.insert(image) results.imagesToGenerate.insert(.init(size: size, image: image)) return RelatedPageLink.Image( url: image.absoluteUrl, description: image.getDescription(for: language), size: size) } return RelatedPageLink( title: title, description: description, url: url, image: image) .content } /** Format: `![model](<file>)` */ private func handleModel(_ arguments: [String], markdown: Substring) -> String { guard arguments.count == 1 else { results.invalid(command: .model, markdown) return "" } let fileId = arguments[0] guard fileId.hasSuffix(".glb") else { results.invalid(command: .model, markdown) return "" } guard let file = content.file(fileId) else { results.missing(file: fileId, markdown: markdown) return "" } results.files.insert(file) results.requiredHeaders.insert(.modelViewer) let description = file.getDescription(for: language) return ModelViewer(file: file.absoluteUrl, description: description).content } private func handleSvg(_ arguments: [String], markdown: Substring) -> String { guard arguments.count == 5 else { results.invalid(command: .svg, markdown) return "" } guard let x = Int(arguments[1]), let y = Int(arguments[2]), let partWidth = Int(arguments[3]), let partHeight = Int(arguments[4]) else { results.invalid(command: .svg, markdown) return "" } let imageId = arguments[0] guard let image = content.image(imageId) else { results.missing(file: imageId, markdown: markdown) return "" } guard case .image(let imageType) = image.type, imageType == .svg else { results.invalid(command: .svg, markdown) return "" } return PartialSvgImage( imagePath: image.absoluteUrl, altText: image.getDescription(for: language), x: x, y: y, width: partWidth, height: partHeight) .content } } /* private func handleGif(file: String, altText: String) -> String { let imagePath = page.pathRelativeToRootForContainedInputFile(file) results.require(file: imagePath, source: page.path) guard let size = results.getImageSize(atPath: imagePath, source: page.path) else { return "" } let width = Int(size.width) let height = Int(size.height) return factory.html.image(file: file, width: width, height: height, altText: altText) } */