Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/page-loaders.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import {
getSeriesUrl,
getTagUrl,
getSearchUrlFromTagFilters,
getTagWorksFeedAtomUrl,
getTagWorksFeedUrl,
getUserProfileUrl,
Expand All @@ -11,7 +12,7 @@ import {
import { CheerioAPI } from "cheerio";
import { load } from "cheerio/slim";
import { getFetcher } from "./fetcher";
import { ArchiveId } from "types/entities";
import { ArchiveId, TagSearchFilters } from "types/entities";

// This is a wrapper around the fetch function that loads the page into a CheerioAPI
// instance and returns the type of the page.
Expand Down Expand Up @@ -61,6 +62,19 @@ export const loadTagPage = async ({ tagName }: { tagName: string }) => {
});
};

export interface TagSearchPage extends CheerioAPI {
kind: "TagSearchPage";
}
export const loadTagSearchPage = async ({
tagSearchFilters,
}: {
tagSearchFilters: TagSearchFilters;
}) => {
return await fetchPage<TagSearchPage>({
url: getSearchUrlFromTagFilters(tagSearchFilters),
});
};

// Atom feed of the most recent works featuring a tag.
// Sample: https://archiveofourown.org/tags/91247110/feed.atom
export interface TagWorksAtomFeed extends CheerioAPI {
Expand Down
42 changes: 41 additions & 1 deletion src/tags/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,49 @@ import { getTagId, getTagNameFromFeed } from "./works-feed-getters";
import {
loadTagFeedAtomPage,
loadTagPage,
loadTagSearchPage,
loadTagWorksFeed,
} from "src/page-loaders";

import type { Tag } from "types/entities";
import type {
Tag,
TagSearchFilters,
TagSearchResultSummary,
} from "types/entities";
import {
getPagesCount,
getTagsSearchResults,
getTotalResults,
} from "./search-getters";

export const searchTags = async (
tagSearchFilters: Partial<TagSearchFilters>
): Promise<TagSearchResultSummary> => {
// We normalize the filters to ensure they have the required properties.
const normalizedFilters: TagSearchFilters = {
tagName: tagSearchFilters.tagName ?? null,
fandoms: tagSearchFilters.fandoms ?? [],
type: tagSearchFilters.type ?? "any",
wranglingStatus: tagSearchFilters.wranglingStatus ?? "any",
sortColumn: tagSearchFilters.sortColumn ?? "name",
sortDirection: tagSearchFilters.sortDirection ?? "asc",
page: tagSearchFilters.page ?? 1,
};

const page = await loadTagSearchPage({ tagSearchFilters: normalizedFilters });

return {
// We return the filters as is because they are already normalized
// and the API expects them to be in this format.
filters: normalizedFilters,
totalResults: getTotalResults(page),
pages: {
total: getPagesCount(page),
current: normalizedFilters.page,
},
tags: getTagsSearchResults(page),
};
};

export const getTag = async ({
tagName,
Expand Down Expand Up @@ -43,6 +82,7 @@ export const getTag = async ({
};
};

// TODO: this is really getCanonicalTagNameById
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could this variable name be changed now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is theoretically a breaking change, and it might end up getting a lot of files (hence the todo, since I don't want to forget that)

export const getTagNameById = async ({ tagId }: { tagId: string }) => {
return getTagNameFromFeed(await loadTagFeedAtomPage({ tagId }));
};
67 changes: 67 additions & 0 deletions src/tags/search-getters.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { TagSearchPage } from "src/page-loaders";
import { TagSearchResultSummary } from "types/entities";

const parseIntOrThrow = (text: string) => {
const match = text.trim().match(/^(\d+)/);
if (!match) {
throw new Error(`Invalid integer: ${text}`);
}
return parseInt(match[1].trim(), 10);
};

export const getTotalResults = (page: TagSearchPage) => {
const totalResultsMatch = page("h3.heading")
.first()
.text()
.match(/(\d+)\s+Found/);
return totalResultsMatch ? parseIntOrThrow(totalResultsMatch[1]) : 0;
};

export const getPagesCount = (page: TagSearchPage) => {
const lastPageMatch = page(".pagination.actions li:not(.next, .previous)")
.last()
.text();
return lastPageMatch ? parseIntOrThrow(lastPageMatch) : 0;
};

export const getTagsSearchResults = (page: TagSearchPage) => {
return page("ol.tag.index.group > li")
.map((_, li) => {
const $li = page(li);
const link = $li.find("a.tag").first();
if (!link.length) {
return null;
}

const name = link.text().trim();

// Tags are in the format: "Type: Name (Works Count)"
// Here we extract the works count.
const worksMatch = $li.text().match(/\((\d+)\)\s*$/);
const worksCount = parseIntOrThrow(worksMatch![1]);

// Tags are in the format: "Type: Name (Works Count)"
// Here we extract the type.
const typeMatch = $li.text().match(/^([^:]+):/);
if (!typeMatch) {
throw new Error(`Invalid tag type: ${$li.text()}`);
}
const type = typeMatch[1].trim().toLowerCase();

const classes = new Set(
($li.find("span").attr("class") ?? "").split(/\s+/).filter(Boolean)
);

return {
name,
type:
type == "unsortedtag"
? "unsorted"
: (type as TagSearchResultSummary["tags"][number]["type"]),
canonical: classes.has("canonical"),
worksCount,
} as const;
})
.get()
.filter((tag) => tag !== null);
};
45 changes: 44 additions & 1 deletion src/urls.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import {
isValidArchiveIdOrNullish,
parseArchiveId,
} from "./utils";
import { WorkSummary } from "types/entities";
import { TagSearchFilters, WorkSummary } from "types/entities";

declare global {
var archiveBaseUrl: string;
Expand Down Expand Up @@ -195,3 +195,46 @@ export const getWorkDetailsFromUrl = ({
collectionName: url.match(/collections\/(\w+)/)?.[1],
};
};

const getSearchParamsFromTagFilters = (
searchFilters: Partial<TagSearchFilters>
) => {
// Prepare the parameters for the search as a map first. This makes them a bit
// more readable, since these parameters will all need to be wrapped with with
// "tag_search[]" in the URL.
const parameters = {
name: searchFilters.tagName ?? "",
fandoms: searchFilters.fandoms?.join(",") ?? "",
type: searchFilters.type?.toLowerCase() ?? "",
wrangling_status:
searchFilters.wranglingStatus
// We remove the _or_ and _and_ that we added for readability
// so that the values match the expected values for the API.
?.replaceAll("_or_", "_")
.replaceAll("_and_", "_") ?? "any",
sort_column:
searchFilters.sortColumn === "works_count"
? "uses"
: searchFilters.sortColumn ?? "name",
sort_direction: searchFilters.sortDirection ?? "asc",
};

const searchParams = new URLSearchParams();
if (searchFilters.page) {
searchParams.set("page", String(searchFilters.page));
}
searchParams.set("commit", "Search Tags");

// Now add the parameters to the search params, wrapped with "tag_search[]"
for (const [key, value] of Object.entries(parameters)) {
searchParams.set(`tag_search[${key}]`, value);
}

return searchParams;
};

export const getSearchUrlFromTagFilters = (searchFilters: TagSearchFilters) => {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be getTagSearchUrlFromTagFilters since there are multiple types of searches?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've gone back and forth on this.... I feel like getTagSearchUrlFromTagFilters has Tag twice and is way too much of a mouthful, but maybe getTagSearchUrlFromFilters is better? (so, making it TagSearch rather than TagFilters)

const url = new URL(`tags/search`, getArchiveBaseUrl());
url.search = getSearchParamsFromTagFilters(searchFilters).toString();
return url.href;
};
Loading