diff --git a/Sources/ContributeWordPress/Extensions/String.swift b/Sources/ContributeWordPress/Extensions/String.swift new file mode 100644 index 00000000..fbf28757 --- /dev/null +++ b/Sources/ContributeWordPress/Extensions/String.swift @@ -0,0 +1,2 @@ +import Foundation + diff --git a/Sources/ContributeWordPress/Images/AssetDownloader.swift b/Sources/ContributeWordPress/Images/AssetDownloader.swift index 22c20ffc..1d400f39 100644 --- a/Sources/ContributeWordPress/Images/AssetDownloader.swift +++ b/Sources/ContributeWordPress/Images/AssetDownloader.swift @@ -9,45 +9,17 @@ import SyndiKit /// A type that downloads assets required by WordPress posts. public struct AssetDownloader: Downloader { - private let downloadPathFromURL: (URL) -> String - private let downloadURLFromURL: (URL) -> URL? private let urlDownloader: URLDownloader - public init( - downloadPathFromURL: ((URL) -> String)? = nil, - downloadURLFromURL: ((URL) -> URL?)? = nil, - urlDownloader: URLDownloader = FileURLDownloader() - ) { - self.downloadPathFromURL = downloadPathFromURL ?? Self.defaultDownloadPath(fromURL:) - self.downloadURLFromURL = downloadURLFromURL ?? Self.defaultDownloadURL(fromURL:) - self.urlDownloader = urlDownloader - } - - internal static func defaultDownloadPath(fromURL url: URL) -> String { - let directoryPrefix = url.host?.components(separatedBy: ".").first ?? "default" - return ([directoryPrefix] + url.pathComponents.suffix(3)).joined(separator: "/") - } - - internal static func defaultDownloadURL(fromURL url: URL) -> URL? { - guard var components = URLComponents(url: url, resolvingAgainstBaseURL: false) else { - return nil - } - components.query = nil - return components.url - } - - /// Downloads assets from WordPress posts. + /// Downloads assets. /// /// - Parameters: - /// - assets: The array of imported assets to be downloaded. - /// - resourceImagePath: The directory path where the downloaded assets will be saved. + /// - assets: The imported assets to be downloaded. /// - dryRun: To perform a dry run without actually downloading the assets. /// - allowsOverwrites: To allow overwriting existing assets. /// - Throws: An `ImportError.assetDownloads` error if there are any errors during - /// the download process. public func download( assets: [WordPressAssetImport], - to resourceImagePath: URL, dryRun: Bool, allowsOverwrites: Bool ) throws { @@ -62,18 +34,13 @@ public struct AssetDownloader: Downloader { for asset in assets { group.enter() - let newPath = downloadPathFromURL(asset.oldURL) - let fromURL = downloadURLFromURL(asset.oldURL) ?? asset.oldURL - - let destinationURL = resourceImagePath.appendingPathComponent(newPath) - urlDownloader.download( - from: fromURL, - to: destinationURL, + from: asset.downloadFromURL, + to: asset.downloadAtURL, allowOverwrite: allowsOverwrites ) { error in if let error = error { - errors[asset.oldURL] = error + errors[asset.downloadFromURL] = error } group.leave() } diff --git a/Sources/ContributeWordPress/Images/Downloader.swift b/Sources/ContributeWordPress/Images/Downloader.swift index 0d81a7a0..065e3003 100644 --- a/Sources/ContributeWordPress/Images/Downloader.swift +++ b/Sources/ContributeWordPress/Images/Downloader.swift @@ -3,9 +3,15 @@ import SyndiKit /// A protocol for downloading assets from WordPress posts. public protocol Downloader { +// func download( +// assets: [WordPressAssetImport], +// to resourceAssetPath: URL, +// dryRun: Bool, +// allowsOverwrites: Bool +// ) throws + func download( assets: [WordPressAssetImport], - to resourceImagePath: URL, dryRun: Bool, allowsOverwrites: Bool ) throws diff --git a/Sources/ContributeWordPress/Images/WordPressAssetImport.swift b/Sources/ContributeWordPress/Images/WordPressAssetImport.swift index 9fba5e02..76f7c8a8 100644 --- a/Sources/ContributeWordPress/Images/WordPressAssetImport.swift +++ b/Sources/ContributeWordPress/Images/WordPressAssetImport.swift @@ -8,13 +8,16 @@ import SyndiKit /// A type that holds information about an asset imported from a `WordPressPost`. public struct WordPressAssetImport: Hashable { /// The original URL of the asset. - public let oldURL: URL + public let downloadFromURL: URL - /// The id of `WordPressPost` to which the asset belongs. - public let parentID: Int? + // TODO: do it once approved + public let downloadAtURL: URL + + // TODO: do it once approved + public let featuredPath: String - /// The new path where the asset will be saved. - public let newPath: String + /// The id of `WordPressPost` to which the asset belongs. + public let parentID: Int /// Initializes a new `WordPressAssetImport` instance. /// @@ -22,30 +25,43 @@ public struct WordPressAssetImport: Hashable { /// - oldURL: The original URL of the asset. /// - parentID: The id of `WordPressPost` to which the asset belongs. /// - newPath: The new path where the asset will be saved. - internal init(oldURL: URL, parentID: Int?, newPath: String) { - self.oldURL = oldURL + internal init(oldURL: URL, newURL: URL, featuredPath: String, parentID: Int) { + self.downloadFromURL = oldURL + self.downloadAtURL = newURL + self.featuredPath = featuredPath self.parentID = parentID - self.newPath = newPath + + print() + print("sourceURL: \(self.downloadFromURL.absoluteString)") + print("destinationURL: \(self.downloadAtURL.absoluteString)") + print("featuredPath: \(self.featuredPath)") + print() } public init?( forPost post: WordPressPost, - oldUrl: String, + sourceURL: URL, assetRoot: String, - assetSiteURL: URL + resourcePathURL: URL, + importPathURL: URL? ) { - guard let oldURL = URL(string: oldUrl) else { - return nil - } + let directoryPrefix = sourceURL.host?.components(separatedBy: ".").first ?? "default" - #warning("SHENDY: Why is it using `default`? There are insances of multiple sites using a multi site in wp. That's what BrightDigit was.") - self.init( - oldURL: oldURL, - parentID: post.ID, - newPath: oldUrl.replacingOccurrences( - of: "\(assetSiteURL)/wp-content/uploads", - with: "/\(assetRoot)/default" + let featuredPath = sourceURL.path + .replacingOccurrences( + of: "/wp-content/uploads", + with: assetRoot ) + .replacingOccurrences(of: "//", with: "/") + + + let destinationURL = resourcePathURL.appendingPathComponent(featuredPath) + + self.init( + oldURL: importPathURL?.appendingPathComponent(sourceURL.path) ?? sourceURL, + newURL: destinationURL, + featuredPath: featuredPath, + parentID: post.ID ) } } diff --git a/Sources/ContributeWordPress/WordPressMarkdownProcessor.swift b/Sources/ContributeWordPress/WordPressMarkdownProcessor.swift index ee586700..b1c55600 100644 --- a/Sources/ContributeWordPress/WordPressMarkdownProcessor.swift +++ b/Sources/ContributeWordPress/WordPressMarkdownProcessor.swift @@ -37,7 +37,7 @@ public struct WordPressMarkdownProcessor< ) { self.exportDecoder = exportDecoder self.redirectWriter = redirectWriter - assetDownloader = assetDownloader + self.assetDownloader = assetDownloader self.destinationURLGenerator = destinationURLGenerator self.contentBuilder = contentBuilder self.postFilters = postFilters @@ -68,9 +68,7 @@ public struct WordPressMarkdownProcessor< // From the list of images attached to this post, // choose the first one as featuredImage let featuredImagePath = assets.first { $0.parentID == post.ID }.map { - ["", assetRoot, $0.newPath] - .joined(separator: "/") - .replacingOccurrences(of: "//", with: "/") + $0.featuredPath } _ = try self.contentBuilder.write( @@ -88,6 +86,10 @@ public struct WordPressMarkdownProcessor< } } + // TODO: remove this once finished + // importAssetURLPath /Users/shendy/Desktop/Projects/Leo/gitlab/leogdion.name/Wordpress/html/ + // oldUrl https://leogdion.name/wp-content/uploads/2018/01/diagram-for-goals-e1535132251116.png + /// Begins the processing of the WordPress posts. /// /// - Parameter settings: The required settings for processing WordPress exports. @@ -96,7 +98,7 @@ public struct WordPressMarkdownProcessor< withSettings settings: WordPressMarkdownProcessorSettings ) throws { // 1. Decodes WordPress posts from exports directory. - let allPosts = try exportDecoder.posts(fromExportsAt: settings.directoryURL) + let allPosts = try exportDecoder.posts(fromExportsAt: settings.exportsDirectoryURL) // 2. Writes redirects for all decoded WordPress posts. try redirectWriter.writeRedirects( @@ -111,75 +113,57 @@ public struct WordPressMarkdownProcessor< from: settings.resourcesPathURL ) ?? settings.resourcesPathURL.path + var htmlFromPost: ((WordPressPost) -> String)? = nil + // 4. Build asset imports from all posts let assetsImports: [WordPressAssetImport] = { guard let urlPathRegex = try? NSRegularExpression( - pattern: "\(settings.assetsSiteURL)/wp-content/uploads([^\"]+)" + pattern: "\(settings.assetSiteURL)/wp-content/uploads([^\"]+)" ) else { fatalError("Unable to create the regex expression") } - // swiftlint:disable:next line_length - #warning("I think oldURL should use the `importImagePathURL` if it's there, rather then change the `downloader` ") return allPosts .flatMap(\.value) .filter { $0.type == "post" } .map { post in - urlPathRegex - .matches( - in: post.body, - range: NSRange(post.body.startIndex..., in: post.body) - ) - .compactMap { match -> String? in - guard let range = Range(match.range, in: post.body) else { - return nil + post.body + .matchesUrls(regex: urlPathRegex) + .compactMap { (match: String) -> WordPressAssetImport? in + guard let sourceURL = URL(string: match) else { return nil } + + let directoryPrefix = sourceURL.host?.components(separatedBy: ".").first ?? "default" + // TODO: Rename this properly + let assetRoot = ["", assetRoot, directoryPrefix].joined(separator: "/") + + // TODO: Still thinking about this. + htmlFromPost = { post in + post.body.replacingOccurrences( + of: "\(settings.assetSiteURL)/wp-content/uploads", + with: assetRoot + ) } - return String(post.body[range]) - } - .compactMap { - WordPressAssetImport( + print(assetRoot) + + return WordPressAssetImport( forPost: post, - oldUrl: String($0), + sourceURL: sourceURL, assetRoot: assetRoot, - assetSiteURL: settings.assetsSiteURL + resourcePathURL: settings.resourcesPathURL, + importPathURL: settings.importAssetPathURL ) } } .flatMap { $0 } }() - // 5. Download all assets (images, pdfs, etc) - // swiftlint:disable:next line_length - #warning("Why is it using `default`? There are insances of multiple sites using a multi site in wp. That's what BrightDigit was.") -// if let importImagePathURL = settings.importAssetPathURL { -// assetDownloader = AssetDownloader( -// downloadPathFromURL: { url in -// (["default"] + url.pathComponents.suffix(3)).joined(separator: "/") -// }, -// downloadURLFromURL: { url in -// importImagePathURL.appendingPathComponent(url.path) -// } -// ) -// } - try assetDownloader.download( assets: assetsImports, - to: settings.resourceAssetPathURL, dryRun: settings.skipDownload, allowsOverwrites: settings.overwriteAssets ) - // 6. To modify asset urls with local path instead - // swiftlint:disable:next line_length - #warning("Why is it using `default`? There are insances of multiple sites using a multi site in wp. That's what BrightDigit was.") - let htmlFromPost: ((WordPressPost) -> String) = { post in - post.body.replacingOccurrences( - of: "\(settings.assetsSiteURL)/wp-content/uploads", - with: "/\(assetRoot)/default" - ) - } - // 7. Starts writing the markdown files for all WordPress post, try writeAllPosts( allPosts, @@ -232,3 +216,20 @@ extension WordPressMarkdownProcessor { ) } } + +extension String { + internal func matchesUrls(regex: NSRegularExpression) -> [String] { + regex + .matches( + in: self, + range: NSRange(self.startIndex..., in: self) + ) + .compactMap { match -> String? in + guard let range = Range(match.range, in: self) else { + return nil + } + + return String(self[range]) + } + } +} diff --git a/Sources/ContributeWordPress/WordPressMarkdownProcessorSettings.swift b/Sources/ContributeWordPress/WordPressMarkdownProcessorSettings.swift index 82bed269..b49a43c0 100644 --- a/Sources/ContributeWordPress/WordPressMarkdownProcessorSettings.swift +++ b/Sources/ContributeWordPress/WordPressMarkdownProcessorSettings.swift @@ -4,33 +4,33 @@ import Foundation public protocol WordPressMarkdownProcessorSettings { /// The URL for the content path /// - /// Example: Content + /// Example: /..../Content/ var contentPathURL: URL { get } /// The URL for the resources path /// - /// Example: Resources + /// Example: /..../Resources/ var resourcesPathURL: URL { get } /// The URL for the directory /// - /// Example: Import/WordPress - var directoryURL: URL { get } + /// Example: /..../WordPress/exports/ + var exportsDirectoryURL: URL { get } /// The URL of the directory that the resource assets will be written to. /// - /// Example: Resources/media/wp-assets + /// Example: /..../Resources/media/wp-assets/ var resourceAssetPathURL: URL { get } /// The URL of the directory that the posts assets should be imported. /// - /// Example: WordPress/html/ + /// Example: /..../WordPress/html/ var importAssetPathURL: URL? { get } /// The URL from which assets will be downloaded /// /// Example: https://websitename.com - var assetsSiteURL: URL { get } + var assetSiteURL: URL { get } /// Whether to overwrite existing assets. var overwriteAssets: Bool { get }