Skip to content

Commit

Permalink
update: refactor begin method with any required components
Browse files Browse the repository at this point in the history
  • Loading branch information
devahmedshendy committed Jul 28, 2023
1 parent 8b006c5 commit 3c4c608
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 113 deletions.
2 changes: 2 additions & 0 deletions Sources/ContributeWordPress/Extensions/String.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Foundation

43 changes: 5 additions & 38 deletions Sources/ContributeWordPress/Images/AssetDownloader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,45 +9,17 @@ import SyndiKit

/// A type that downloads assets required by WordPress posts.
public struct AssetDownloader: Downloader {
private let downloadPathFromURL: (URL) -> String
private let downloadURLFromURL: (URL) -> URL?
private let urlDownloader: URLDownloader

public init(
downloadPathFromURL: ((URL) -> String)? = nil,
downloadURLFromURL: ((URL) -> URL?)? = nil,
urlDownloader: URLDownloader = FileURLDownloader()
) {
self.downloadPathFromURL = downloadPathFromURL ?? Self.defaultDownloadPath(fromURL:)
self.downloadURLFromURL = downloadURLFromURL ?? Self.defaultDownloadURL(fromURL:)
self.urlDownloader = urlDownloader
}

internal static func defaultDownloadPath(fromURL url: URL) -> String {
let directoryPrefix = url.host?.components(separatedBy: ".").first ?? "default"
return ([directoryPrefix] + url.pathComponents.suffix(3)).joined(separator: "/")
}

internal static func defaultDownloadURL(fromURL url: URL) -> URL? {
guard var components = URLComponents(url: url, resolvingAgainstBaseURL: false) else {
return nil
}
components.query = nil
return components.url
}

/// Downloads assets from WordPress posts.
/// Downloads assets.
///
/// - Parameters:
/// - assets: The array of imported assets to be downloaded.
/// - resourceImagePath: The directory path where the downloaded assets will be saved.
/// - assets: The imported assets to be downloaded.
/// - dryRun: To perform a dry run without actually downloading the assets.
/// - allowsOverwrites: To allow overwriting existing assets.
/// - Throws: An `ImportError.assetDownloads` error if there are any errors during
/// the download process.
public func download(
assets: [WordPressAssetImport],
to resourceImagePath: URL,
dryRun: Bool,
allowsOverwrites: Bool
) throws {
Expand All @@ -62,18 +34,13 @@ public struct AssetDownloader: Downloader {
for asset in assets {
group.enter()

let newPath = downloadPathFromURL(asset.oldURL)
let fromURL = downloadURLFromURL(asset.oldURL) ?? asset.oldURL

let destinationURL = resourceImagePath.appendingPathComponent(newPath)

urlDownloader.download(
from: fromURL,
to: destinationURL,
from: asset.downloadFromURL,
to: asset.downloadAtURL,
allowOverwrite: allowsOverwrites
) { error in
if let error = error {
errors[asset.oldURL] = error
errors[asset.downloadFromURL] = error
}
group.leave()
}
Expand Down
8 changes: 7 additions & 1 deletion Sources/ContributeWordPress/Images/Downloader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,15 @@ import SyndiKit

/// A protocol for downloading assets from WordPress posts.
public protocol Downloader {
// func download(
// assets: [WordPressAssetImport],
// to resourceAssetPath: URL,
// dryRun: Bool,
// allowsOverwrites: Bool
// ) throws

func download(
assets: [WordPressAssetImport],
to resourceImagePath: URL,
dryRun: Bool,
allowsOverwrites: Bool
) throws
Expand Down
56 changes: 36 additions & 20 deletions Sources/ContributeWordPress/Images/WordPressAssetImport.swift
Original file line number Diff line number Diff line change
Expand Up @@ -8,44 +8,60 @@ import SyndiKit
/// A type that holds information about an asset imported from a `WordPressPost`.
public struct WordPressAssetImport: Hashable {
/// The original URL of the asset.
public let oldURL: URL
public let downloadFromURL: URL

/// The id of `WordPressPost` to which the asset belongs.
public let parentID: Int?
// TODO: do it once approved
public let downloadAtURL: URL

// TODO: do it once approved
public let featuredPath: String

/// The new path where the asset will be saved.
public let newPath: String
/// The id of `WordPressPost` to which the asset belongs.
public let parentID: Int

/// Initializes a new `WordPressAssetImport` instance.
///
/// - Parameters:
/// - oldURL: The original URL of the asset.
/// - parentID: The id of `WordPressPost` to which the asset belongs.
/// - newPath: The new path where the asset will be saved.
internal init(oldURL: URL, parentID: Int?, newPath: String) {
self.oldURL = oldURL
internal init(oldURL: URL, newURL: URL, featuredPath: String, parentID: Int) {
self.downloadFromURL = oldURL
self.downloadAtURL = newURL
self.featuredPath = featuredPath
self.parentID = parentID
self.newPath = newPath

print()
print("sourceURL: \(self.downloadFromURL.absoluteString)")
print("destinationURL: \(self.downloadAtURL.absoluteString)")
print("featuredPath: \(self.featuredPath)")
print()
}

public init?(
forPost post: WordPressPost,
oldUrl: String,
sourceURL: URL,
assetRoot: String,
assetSiteURL: URL
resourcePathURL: URL,
importPathURL: URL?
) {
guard let oldURL = URL(string: oldUrl) else {
return nil
}
let directoryPrefix = sourceURL.host?.components(separatedBy: ".").first ?? "default"

#warning("SHENDY: Why is it using `default`? There are insances of multiple sites using a multi site in wp. That's what BrightDigit was.")
self.init(
oldURL: oldURL,
parentID: post.ID,
newPath: oldUrl.replacingOccurrences(
of: "\(assetSiteURL)/wp-content/uploads",
with: "/\(assetRoot)/default"
let featuredPath = sourceURL.path
.replacingOccurrences(
of: "/wp-content/uploads",
with: assetRoot
)
.replacingOccurrences(of: "//", with: "/")


let destinationURL = resourcePathURL.appendingPathComponent(featuredPath)

self.init(
oldURL: importPathURL?.appendingPathComponent(sourceURL.path) ?? sourceURL,
newURL: destinationURL,
featuredPath: featuredPath,
parentID: post.ID
)
}
}
95 changes: 48 additions & 47 deletions Sources/ContributeWordPress/WordPressMarkdownProcessor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public struct WordPressMarkdownProcessor<
) {
self.exportDecoder = exportDecoder
self.redirectWriter = redirectWriter
assetDownloader = assetDownloader
self.assetDownloader = assetDownloader
self.destinationURLGenerator = destinationURLGenerator
self.contentBuilder = contentBuilder
self.postFilters = postFilters
Expand Down Expand Up @@ -68,9 +68,7 @@ public struct WordPressMarkdownProcessor<
// From the list of images attached to this post,
// choose the first one as featuredImage
let featuredImagePath = assets.first { $0.parentID == post.ID }.map {
["", assetRoot, $0.newPath]
.joined(separator: "/")
.replacingOccurrences(of: "//", with: "/")
$0.featuredPath
}

_ = try self.contentBuilder.write(
Expand All @@ -88,6 +86,10 @@ public struct WordPressMarkdownProcessor<
}
}

// TODO: remove this once finished
// importAssetURLPath /Users/shendy/Desktop/Projects/Leo/gitlab/leogdion.name/Wordpress/html/
// oldUrl https://leogdion.name/wp-content/uploads/2018/01/diagram-for-goals-e1535132251116.png

/// Begins the processing of the WordPress posts.
///
/// - Parameter settings: The required settings for processing WordPress exports.
Expand All @@ -96,7 +98,7 @@ public struct WordPressMarkdownProcessor<
withSettings settings: WordPressMarkdownProcessorSettings
) throws {
// 1. Decodes WordPress posts from exports directory.
let allPosts = try exportDecoder.posts(fromExportsAt: settings.directoryURL)
let allPosts = try exportDecoder.posts(fromExportsAt: settings.exportsDirectoryURL)

// 2. Writes redirects for all decoded WordPress posts.
try redirectWriter.writeRedirects(
Expand All @@ -111,75 +113,57 @@ public struct WordPressMarkdownProcessor<
from: settings.resourcesPathURL
) ?? settings.resourcesPathURL.path

var htmlFromPost: ((WordPressPost) -> String)? = nil

// 4. Build asset imports from all posts
let assetsImports: [WordPressAssetImport] = {
guard let urlPathRegex = try? NSRegularExpression(
pattern: "\(settings.assetsSiteURL)/wp-content/uploads([^\"]+)"
pattern: "\(settings.assetSiteURL)/wp-content/uploads([^\"]+)"
) else {
fatalError("Unable to create the regex expression")
}

// swiftlint:disable:next line_length
#warning("I think oldURL should use the `importImagePathURL` if it's there, rather then change the `downloader` ")
return allPosts
.flatMap(\.value)
.filter { $0.type == "post" }
.map { post in
urlPathRegex
.matches(
in: post.body,
range: NSRange(post.body.startIndex..., in: post.body)
)
.compactMap { match -> String? in
guard let range = Range(match.range, in: post.body) else {
return nil
post.body
.matchesUrls(regex: urlPathRegex)
.compactMap { (match: String) -> WordPressAssetImport? in
guard let sourceURL = URL(string: match) else { return nil }

let directoryPrefix = sourceURL.host?.components(separatedBy: ".").first ?? "default"
// TODO: Rename this properly
let assetRoot = ["", assetRoot, directoryPrefix].joined(separator: "/")

// TODO: Still thinking about this.
htmlFromPost = { post in
post.body.replacingOccurrences(
of: "\(settings.assetSiteURL)/wp-content/uploads",
with: assetRoot
)
}

return String(post.body[range])
}
.compactMap {
WordPressAssetImport(
print(assetRoot)

return WordPressAssetImport(
forPost: post,
oldUrl: String($0),
sourceURL: sourceURL,
assetRoot: assetRoot,
assetSiteURL: settings.assetsSiteURL
resourcePathURL: settings.resourcesPathURL,
importPathURL: settings.importAssetPathURL
)
}
}
.flatMap { $0 }
}()

// 5. Download all assets (images, pdfs, etc)
// swiftlint:disable:next line_length
#warning("Why is it using `default`? There are insances of multiple sites using a multi site in wp. That's what BrightDigit was.")
// if let importImagePathURL = settings.importAssetPathURL {
// assetDownloader = AssetDownloader(
// downloadPathFromURL: { url in
// (["default"] + url.pathComponents.suffix(3)).joined(separator: "/")
// },
// downloadURLFromURL: { url in
// importImagePathURL.appendingPathComponent(url.path)
// }
// )
// }

try assetDownloader.download(
assets: assetsImports,
to: settings.resourceAssetPathURL,
dryRun: settings.skipDownload,
allowsOverwrites: settings.overwriteAssets
)

// 6. To modify asset urls with local path instead
// swiftlint:disable:next line_length
#warning("Why is it using `default`? There are insances of multiple sites using a multi site in wp. That's what BrightDigit was.")
let htmlFromPost: ((WordPressPost) -> String) = { post in
post.body.replacingOccurrences(
of: "\(settings.assetsSiteURL)/wp-content/uploads",
with: "/\(assetRoot)/default"
)
}

// 7. Starts writing the markdown files for all WordPress post,
try writeAllPosts(
allPosts,
Expand Down Expand Up @@ -232,3 +216,20 @@ extension WordPressMarkdownProcessor {
)
}
}

extension String {
internal func matchesUrls(regex: NSRegularExpression) -> [String] {
regex
.matches(
in: self,
range: NSRange(self.startIndex..., in: self)
)
.compactMap { match -> String? in
guard let range = Range(match.range, in: self) else {
return nil
}

return String(self[range])
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,33 @@ import Foundation
public protocol WordPressMarkdownProcessorSettings {
/// The URL for the content path
///
/// Example: Content
/// Example: /..../Content/
var contentPathURL: URL { get }

/// The URL for the resources path
///
/// Example: Resources
/// Example: /..../Resources/
var resourcesPathURL: URL { get }

/// The URL for the directory
///
/// Example: Import/WordPress
var directoryURL: URL { get }
/// Example: /..../WordPress/exports/
var exportsDirectoryURL: URL { get }

/// The URL of the directory that the resource assets will be written to.
///
/// Example: Resources/media/wp-assets
/// Example: /..../Resources/media/wp-assets/
var resourceAssetPathURL: URL { get }

/// The URL of the directory that the posts assets should be imported.
///
/// Example: WordPress/html/
/// Example: /..../WordPress/html/
var importAssetPathURL: URL? { get }

/// The URL from which assets will be downloaded
///
/// Example: https://websitename.com
var assetsSiteURL: URL { get }
var assetSiteURL: URL { get }

/// Whether to overwrite existing assets.
var overwriteAssets: Bool { get }
Expand Down

0 comments on commit 3c4c608

Please sign in to comment.