Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

+1 validation text #262

Merged
merged 4 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions ffun/ffun/cli/commands/processors_quality.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import pathlib
from collections import Counter

import typer

Expand Down Expand Up @@ -135,3 +136,44 @@ async def run_deff_all(processor_name: str, knowlege_root: pathlib.Path = _root,
@cli_app.command()
def diff_all(processor: str, knowlege_root: pathlib.Path = _root, show_tag_diffs: bool = False) -> None:
asyncio.run(run_deff_all(processor, knowlege_root=knowlege_root, show_tag_diffs=show_tag_diffs))


async def run_prepare_news_item(
processor: str, entry_id: int, knowlege_root: pathlib.Path, requests_number: int, min_tags_count: int
) -> None:
results = []

async with with_app():
kb = KnowlegeBase(knowlege_root)

for i in range(requests_number):
logger.info("requesting_tags", step=i, steps_number=requests_number)
result = await single_run(processor, entry_id, kb, actual=True)
results.append(result)

logger.info("requests_completed")

tags: Counter[str] = Counter()

for result in results:
tags.update(result.tags)

tags_must_have = {tag for tag, count in tags.items() if count == requests_number}
tags_should_have = {tag for tag, count in tags.items() if min_tags_count <= count < requests_number}

kb.save_expected_data(processor, entry_id, tags_must_have=tags_must_have, tags_should_have=tags_should_have)


@cli_app.command()
def prepere_news_item(
processor: str, entry: int, knowlege_root: pathlib.Path = _root, requests_number: int = 5, min_tags_count: int = 3
) -> None:
asyncio.run(
run_prepare_news_item(
processor,
entry_id=entry,
knowlege_root=knowlege_root,
requests_number=requests_number,
min_tags_count=min_tags_count,
)
)
14 changes: 14 additions & 0 deletions ffun/ffun/processors_quality/knowlege_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,20 @@ def save_processor_result(self, processor: str, entry_id: int, result: Processor
actual_path = self._dir_tags_actual / processor / f"{id_to_name(entry_id)}.toml"
actual_path.write_text(content)

def save_expected_data(
self, processor: str, entry_id: int, tags_must_have: set[str], tags_should_have: set[str]
) -> None:
if tags_must_have & tags_should_have:
raise NotImplementedError("tags_must_have and tags_should_have should not intersect")

(self._dir_tags_expected / processor).mkdir(parents=True, exist_ok=True)

tags_path = self._dir_tags_expected / processor / f"{id_to_name(entry_id)}.toml"

data = {"tags_must_have": list(sorted(tags_must_have)), "tags_should_have": list(sorted(tags_should_have))}

tags_path.write_text(toml.dumps(data))

def copy_last_to_actual(self, processor: str, entry_id: int) -> None:
last_path = self._dir_tags_last / processor / f"{id_to_name(entry_id)}.toml"
actual_path = self._dir_tags_actual / processor / f"{id_to_name(entry_id)}.toml"
Expand Down
24 changes: 24 additions & 0 deletions tags_quality_base/news/0012.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
title = "SpaceX will return stranded astronauts in February 2025, NASA announces"
external_url = "https://en.wikinews.org/wiki/SpaceX_will_return_stranded_astronauts_in_February_2025,_NASA_announces"
external_tags = []
published_at = "2024-08-30T12:00:00+00:00"

[attribution]
title = ": SpaceX will return stranded astronauts in February 2025, NASA announces"
authors = ["Wikinews contributors"]
link = "https://en.wikinews.org/wiki/SpaceX_will_return_stranded_astronauts_in_February_2025,_NASA_announces"
license = "CC BY-NC-ND 4.0"
---
<p>On Saturday, <a href="/wiki/NASA" class="mw-redirect" title="NASA">NASA</a> (National Aeronautics and Space Administration) announced at a press conference that <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Sunita_Williams" class="extiw" title="w:Sunita Williams">Sunita Williams</a></span> and <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Barry_Wilmore" class="extiw" title="w:Barry Wilmore">Barry Wilmore</a></span> would return home from the <a href="/wiki/International_Space_Station" class="mw-redirect" title="International Space Station">International Space Station</a> (ISS) on a <a href="/wiki/SpaceX" class="mw-redirect" title="SpaceX">SpaceX</a> <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Crew_Dragon" class="extiw" title="w:Crew Dragon">Crew Dragon</a></span> spacecraft in February 2025. The <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Boeing_Starliner" class="extiw" title="w:Boeing Starliner">Boeing Starliner</a></span>, the spacecraft which had been expected to transport the crew from ISS earlier in June, would return to Earth "un-crewed" — on autopilot with planned landing in a desert in <a href="/wiki/New_Mexico" class="mw-redirect" title="New Mexico">New Mexico</a>, possibly next month, the NASA announcement said.
</p>
<figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/File:Boeing%27s_Starliner_crew_ship_approaches_the_space_station_(iss067e066735)_(cropped).jpg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/d/d3/Boeing%27s_Starliner_crew_ship_approaches_the_space_station_%28iss067e066735%29_%28cropped%29.jpg/220px-Boeing%27s_Starliner_crew_ship_approaches_the_space_station_%28iss067e066735%29_%28cropped%29.jpg" decoding="async" width="220" height="152" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/d/d3/Boeing%27s_Starliner_crew_ship_approaches_the_space_station_%28iss067e066735%29_%28cropped%29.jpg/330px-Boeing%27s_Starliner_crew_ship_approaches_the_space_station_%28iss067e066735%29_%28cropped%29.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/d/d3/Boeing%27s_Starliner_crew_ship_approaches_the_space_station_%28iss067e066735%29_%28cropped%29.jpg/440px-Boeing%27s_Starliner_crew_ship_approaches_the_space_station_%28iss067e066735%29_%28cropped%29.jpg 2x" data-file-width="3705" data-file-height="2560"></a><figcaption>Boeing's Starliner crew ship approaches the space station. <br><span style="font-size:90%" class="plainlinks">Image: Bob Hines/NASA.</span></figcaption></figure>
<figure class="mw-default-size mw-halign-left" typeof="mw:File/Thumb"><a href="/wiki/File:Iss071e052057.jpg" class="mw-file-description"><img src="//upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Iss071e052057.jpg/220px-Iss071e052057.jpg" decoding="async" width="220" height="147" class="mw-file-element" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Iss071e052057.jpg/330px-Iss071e052057.jpg 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Iss071e052057.jpg/440px-Iss071e052057.jpg 2x" data-file-width="8256" data-file-height="5504"></a><figcaption>Spacex Crew Dragon Endeavour approaching the ISS in May 2024 during <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Crew-8" class="extiw" title="w:Crew-8">Crew-8</a></span>. <br><span style="font-size:90%" class="plainlinks">Image: NASA Johnson Space Center.</span></figcaption></figure>
<p>On its way to the ISS, Starliner has experienced helium leaks and several thruster malfunctions, which, according to National Public Radio (<a href="/wiki/NPR" class="mw-redirect" title="NPR">NPR</a>, "inexplicably conked out as Starliner approached the station.". <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Steve_Stich" class="extiw" title="w:Steve Stich">Steve Stich</a></span>, the manager of NASA's <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Commercial_Crew_Program" class="extiw" title="w:Commercial Crew Program">Commercial Crew Program</a></span>, said they didn't know if Starliner's thrusters would fail at critical moments. The Starliner test flight was Boeing's first crewed space mission; three previous attempts for this crewed mission had failed.
</p><p>The two NASA astronauts had been launched during <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Boeing_Starliner_Crewed_Test_Flight" class="extiw" title="w:Boeing Starliner Crewed Test Flight">Starliner's crewed test flight</a></span> on June 5 for what was expected to be an eight-day long mission to the ISS. A successful mission is a requirement to certify Starliner for NASA’s Commercial Crew Program. Due to the change of plans, the astronauts would remain at ISS for several months as part of <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Expedition_71/72" class="extiw" title="w:Expedition 71/72">Expedition 71/72</a></span>.
</p><p><a href="/wiki/Boeing" class="mw-redirect" title="Boeing">Boeing</a> wrote on social media about its commitment to safety and to cooperation with NASA, as they were continuing "to focus, first and foremost, on the safety of the crew and spacecraft. We are executing the mission as determined by NASA, and we are preparing the spacecraft for a safe and successful uncrewed return."
</p><p>The Crew Dragon would carry two astronauts to the ISS instead of the planned 4 to allow room for Williams and Wilmore, NASA announced.
</p><p>NASA said that SpaceX would reportedly leave Earth in September for the planned <span class="interwiki-link-foreign"><a href="https://en.wikipedia.org/wiki/Crew-9" class="extiw" title="w:Crew-9">Crew-9</a></span> mission, with a change of plans to have only two crew on board, so that the remaining two free seats will be available for the stranded astronauts to return on Earth in February.
</p><p>SpaceX’s Crew Dragon—certified in 2020—will undergo some changes before launching on September 24. The preparation to bring Williams and Wilmore home to their families includes additional cargo, adjusting the seats for the Dragon, and Dragon-specific spacesuits for the two astronauts. The preparation also adds in a contingency plan, providing the two a flight home on the Crew-8 spacecraft.
</p><p>To date, SpaceX had completed nine crewed flights to space for NASA and several commercial flights, BBC reported.
</p>
2 changes: 2 additions & 0 deletions tags_quality_base/tags-actual/openai_llm_general/0012.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
tags = [ "aerospace", "aerospace-engineering", "aerospace-industry", "astronaut-training", "astronauts", "astronauts-return", "barry-wilmore", "boeing", "boeing-innovations", "boeing-starliner", "boeing-starliner-issues", "commercial-crew-program", "commercial-spaceflight", "crew-8", "crew-8-preparations", "crew-9", "crew-9-planning", "crew-dragon", "crew-dragon-missions", "crew-mission-challenges", "crew-missions", "crew-transport", "crew-transport-adjustments", "crew-transportation", "expedition-71-72", "expedition-71-72-operations", "government-space-programs", "helium-leak-investigation", "helium-leaks", "human-spaceflight", "international-space-collaboration", "international-space-cooperation", "international-space-station", "launch-operations", "launch-vehicle-operations", "mission-operations", "nasa", "nasa-initiatives", "national-public-radio", "new-mexico", "private-space-ventures", "robotics-in-space", "space-agencies", "space-agency-collaboration", "space-agency-initiatives", "space-agency-press-releases", "space-announcements", "space-crew-updates", "space-exploration", "space-exploration-goals", "space-exploration-initiatives", "space-exploration-news", "space-industry-advancements", "space-industry-developments", "space-innovation", "space-logistics", "space-logistics-and-support", "space-mission-analytics", "space-mission-architecture", "space-mission-communication", "space-mission-coverage", "space-mission-impacts", "space-mission-management", "space-mission-operations", "space-mission-planning", "space-mission-reports", "space-mission-safety", "space-mission-scheduling", "space-mission-technology", "space-mission-timelines", "space-mission-updates", "space-missions", "space-news", "space-policy", "space-policy-and-governance", "space-research", "space-research-and-development", "space-research-collaboration", "space-safety", "space-safety-regulations", "space-safety-standards", "space-suit-designs", "space-systems", "space-systems-engineering", "space-technology", "space-technology-advancements", "space-technology-development", "space-technology-trends", "space-transportation", "space-transportation-systems", "spacecraft", "spacecraft-cargo-management", "spacecraft-certification", "spacecraft-design", "spacecraft-engineering", "spacecraft-integration", "spacecraft-landing-plans", "spacecraft-recovery", "spacecraft-safety", "spacecraft-safety-protocols", "spacecraft-seating-arrangements", "spacecraft-technology", "spaceflight-education", "spaceflight-history", "spaceflight-innovation", "spaceflight-safety", "spacex", "spacex-developments", "steve-stich", "sunita-williams", "thruster-malfunction-analysis", "thruster-malfunctions",]
created_at = 2024-09-18T12:15:07.608906Z
68 changes: 68 additions & 0 deletions tags_quality_base/tags-expected/openai_llm_general/0012.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
tags_must_have = [
"aerospace",
"aerospace-engineering",
"aerospace-industry",
"astronauts",
"barry-wilmore",
"boeing",
"boeing-starliner",
"commercial-crew-program",
"commercial-spaceflight",
"crew-8",
"crew-9",
"crew-dragon",
"expedition-71-72",
"helium-leaks",
"human-spaceflight",
"launch-operations",
"nasa",
"space-agencies",
"space-announcements",
"space-exploration",
"space-innovation",
"space-logistics",
"space-mission-architecture",
"space-mission-scheduling",
"space-missions",
"space-news",
"space-safety",
"space-technology",
"spacecraft",
"spacecraft-safety",
"spacex",
"steve-stich",
"sunita-williams",
"thruster-malfunctions",]

tags_should_have = [
"astronauts-return",
"aviation-safety",
"crew-9-planning",
"crew-transport",
"government-contracting",
"mission-operations",
"mission-planning",
"national-public-radio",
"new-mexico",
"robotics",
"space-agency-collaboration",
"space-engineering",
"space-entrepreneurship",
"space-exploration-initiatives",
"space-infrastructure",
"space-mission-analytics",
"space-mission-failures",
"space-operations",
"space-reports",
"space-safety-standards",
"space-systems-engineering",
"space-transportation",
"space-transportation-systems",
"spacecraft-certification",
"spacecraft-landing-plans",
"spacecraft-operations",
"spacecraft-reliability",
"spacecraft-technology",
"spaceflight-history",
"spacesuit-designs",
]
4 changes: 2 additions & 2 deletions tags_quality_base/tags-last/openai_llm_general/0001.toml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
tags = [ "academic-writing", "artificial-intelligence", "artificial-intelligence-research", "artificial-life", "behavioral-economics", "big-data", "big-data-ethics", "book-analysis", "book-review", "business", "business-analysts", "business-intelligence", "case-study-analysis", "chaos-theory", "climate-change-prediction", "cognitive-science", "complexity-science", "computational-social-science", "critical-analysis", "cybernetics", "data-analysis", "data-analysis-techniques", "data-analysts", "data-cleaning", "data-communication", "data-driven-business", "data-driven-decision-making", "data-driven-economy", "data-driven-future", "data-driven-governance", "data-driven-innovation", "data-driven-insights", "data-driven-society", "data-driven-solutions", "data-driven-strategies", "data-driven-world", "data-ethics", "data-interpretation", "data-mining", "data-preprocessing", "data-privacy", "data-science", "data-scientists", "data-security", "data-visualization", "decision-makers", "decision-making", "decision-support-systems", "decision-theory", "disease-outbreak-prediction", "earthquake-prediction", "economic-forecasting", "economics", "economists", "election-forecasting", "environment", "feature-engineering", "finance", "financial-analysts", "financial-markets", "financial-modeling", "forecasting", "forecasting-biases", "forecasting-errors", "forecasting-methods", "fraud-detection", "game-theory", "health", "information-extraction", "information-extraction-from-data", "information-management", "information-retrieval", "information-synthesis", "information-theory", "knowledge-discovery", "knowledge-discovery-from-data", "knowledge-management", "knowledge-representation", "knowledge-synthesis", "literature-review", "machine-learning", "machine-learning-algorithms", "machine-learning-engineers", "market-research", "market-researchers", "market-trends", "meteorologists", "meteorology", "model-selection", "nate-silver", "natural-language-processing", "network-science", "operations-research", "operations-researchers", "overfitting", "pattern-recognition", "policy-makers", "political-science", "political-scientists", "politics", "prediction", "predictive-analytics", "predictive-maintenance", "predictive-modeling", "probability", "research", "research-methodology", "researchers", "risk-assessment", "risk-assessment-frameworks", "risk-management", "risk-management-strategies", "risk-managers", "scale", "security", "sentiment-analysis", "signal-to-noise-ratio", "social-media-analysis", "social-psychology", "social-science", "statistical-modeling", "statisticians", "statistics", "systematic-review", "systems-thinking", "technology", "terrorism-prediction", "the-signal-and-the-noise", "theoretical-framework", "time-series-analysis", "uncertainty", "uncertainty-quantification", "underfitting", "us-elections", "weather-forecasting",]
created_at = 2024-09-05T10:57:23.291918Z
tags = [ "academic-publishers", "accessibility", "accessibility-in-writing", "analytical-thinking", "analytics", "author-analysis", "basic-theses", "behavioral-economics", "bestsellers", "bestselling-authors", "bestselling-books", "book-reviews", "brief-summaries", "brigid", "business", "business-intelligence", "case-per-chapter", "case-studies", "case-study-methodology", "cognitive-bias", "cognitive-biases", "cognitive-science", "coherent-systems", "communication", "communication-studies", "communication-theory", "complexity", "complexity-theory", "contextual-analysis", "cool-facts", "critical-theory", "critical-thinking", "data-analysis", "data-analytics-firms", "data-collection", "data-driven-decision-making", "data-ethics", "data-interpretation", "data-science", "data-science-community", "data-visualization", "decision-making", "economics", "education", "environmental-science", "epistemology", "error-analysis", "errors", "errors-in-forecasting", "evidence-based-practice", "forecasting", "forecasting-accuracy", "forecasting-approaches", "forecasting-challenges", "forecasting-community", "forecasting-complexities", "forecasting-errors", "forecasting-implications", "forecasting-issues", "forecasting-methodologies", "forecasting-methods", "forecasting-misconceptions", "forecasting-practices", "forecasting-strategies", "forecasting-techniques", "forecasting-tools", "information-architecture", "information-distribution", "information-systems", "information-technology", "information-theory", "interpretation", "knowledge-management", "literary-critics", "literature", "meteorology", "methodology", "misconceptions", "nate-silver", "philosophy-of-science", "policy-analysis", "practical-application", "practical-approaches", "practical-cases", "practical-forecasting", "pragmatics", "predictive-analytics", "prisms-of-analysis", "psychology", "public-administration", "public-policy", "publications", "quantitative-analysis", "quantitative-research", "research", "research-institutions", "research-methodologies", "risk-analysis", "scale", "scientific-methods", "significant-tasks", "social-constructivism", "social-sciences", "sociology", "statistical-forecasting", "statistical-modeling", "statistics", "systematic-approaches", "systematic-review", "systematics", "the-signal-and-the-noise", "theoretical-approaches", "theoretical-forecasting", "theory", "theory-and-practice", "theory-building", "thesis-development", "tiendil", "us-elections", "weather-forecasting", "wikipedia",]
created_at = 2024-09-18T12:08:15.293682Z
Loading
Loading