Skip to content

Commit

Permalink
Merge pull request #183 from boudicca-events/abl/metal-collectors
Browse files Browse the repository at this point in the history
Abl/metal collectors
  • Loading branch information
kadhonn authored Nov 4, 2023
2 parents a53f371 + 0d76017 commit a79855e
Show file tree
Hide file tree
Showing 4 changed files with 163 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import base.boudicca.api.eventcollector.EventCollectorScheduler
import events.boudicca.eventcollector.collectors.*

fun main() {
Thread.sleep(10000) // let eventdb startup first when both are deployed.... we should do a better thing here
EventCollectorScheduler()
.startWebUi()
.addEventCollector(LinzTermineCollector())
Expand All @@ -29,5 +28,7 @@ fun main() {
.addEventCollector(StiftskonzerteCollector())
.addEventCollector(GewaexhausCollector())
.addEventCollector(OehJkuCollector())
.addEventCollector(ArenaWienCollector())
.addEventCollector(ViperRoomCollector())
.run()
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,7 @@ fun main() {
// .debug(UlfOoeCollector())
// .debug(StiftskonzerteCollector())
// .debug(GewaexhausCollector())
.debug(OehJkuCollector())
}
// .debug(OehJkuCollector())
// .debug(ArenaWienCollector())
.debug(ViperRoomCollector())
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package events.boudicca.eventcollector.collectors

import base.boudicca.SemanticKeys
import base.boudicca.api.eventcollector.Fetcher
import base.boudicca.api.eventcollector.TwoStepEventCollector
import base.boudicca.model.Event
import com.beust.klaxon.JsonObject
import com.beust.klaxon.Parser
import org.jsoup.Jsoup
import java.io.StringReader
import java.time.LocalDateTime
import java.time.ZoneId
import java.time.format.DateTimeFormatter

class ArenaWienCollector : TwoStepEventCollector<ArenaWienCollector.HalfEvent>("arenawien") {

private val fetcher = Fetcher()
private val jsonParser = Parser.default()

override fun getAllUnparsedEvents(): List<HalfEvent> {
val halfEvents = mutableListOf<HalfEvent>()
val parsedFirstSite = getProgramList(0)
halfEvents.addAll(getAllUrls(parsedFirstSite))
val maxPage = parsedFirstSite.int("maxPage")!!
for (i in 1..maxPage) {
halfEvents.addAll(getAllUrls(getProgramList(i)))
}

return halfEvents
}

override fun parseEvent(event: HalfEvent): Event {
val eventSite = Jsoup.parse(fetcher.fetchUrl(event.url))

val startDate =
LocalDateTime.parse(event.dateBegin, DateTimeFormatter.ISO_DATE_TIME)
.atZone(ZoneId.of("Europe/Vienna"))
.toOffsetDateTime()

val data = mutableMapOf<String, String>()
if (!event.dateEnd.isNullOrBlank()) {
data[SemanticKeys.ENDDATE] = event.dateEnd
}
data[SemanticKeys.URL] = event.url
data[SemanticKeys.TYPE] = "concert"
data[SemanticKeys.DESCRIPTION] = eventSite.select("div.suite_VAdescr").text()

val img = eventSite.select("div.suite_imageContainer img")
if (!img.isEmpty()) {
data[SemanticKeys.PICTUREURL] = "https://arena.wien/" + img.first()!!.attr("src")
}

data[SemanticKeys.LOCATION_NAME] = "Arena Wien"

return Event(event.title!!, startDate, data)
}

private fun getProgramList(i: Int): JsonObject {
return jsonParser.parse(StringReader(fetcher.fetchUrl(getAjaxUrl(i)))) as JsonObject
}

private fun getAllUrls(jsonObject: JsonObject): Collection<HalfEvent> {
return jsonObject.array<JsonObject>("concerts")!!.map {
HalfEvent(
it.string("DetailUrl")!!,
it.string("DateBegin"),
it.string("DateEnd"),
it.string("Location"),
it.string("Title"),
)
}
}

private fun getAjaxUrl(page: Int): String {
return "https://arena.wien/DesktopModules/WebAPI/API/Event/Search?searchTerm=&day=1&month=-1&year=-1&page=${page}&pageSize=20&eventCategory=-1&abonnement=-1&cultureCode=de-AT&locationId=0"
}

data class HalfEvent(
val url: String,
val dateBegin: String?,
val dateEnd: String?,
val location: String?,
val title: String?,
)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package events.boudicca.eventcollector.collectors

import base.boudicca.SemanticKeys
import base.boudicca.api.eventcollector.Fetcher
import base.boudicca.api.eventcollector.TwoStepEventCollector
import base.boudicca.model.Event
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import java.time.LocalDate
import java.time.LocalTime
import java.time.OffsetDateTime
import java.time.ZoneId
import java.time.format.DateTimeFormatter
import java.util.*

class ViperRoomCollector : TwoStepEventCollector<String>("viperroom") {

private val fetcher = Fetcher()

override fun getAllUnparsedEvents(): List<String> {
val eventsList = Jsoup.parse(fetcher.fetchUrl("https://www.viper-room.at/veranstaltungen"))

return eventsList.select("ul.events_list div.event_actions a:nth-child(1)")
.map { it.attr("href") }
}

override fun parseEvent(event: String): Event {
val eventSite = Jsoup.parse(fetcher.fetchUrl(event))

val name = eventSite.select("h1.entry-title").text()
val startDate = parseDate(eventSite)

val data = mutableMapOf<String, String>()
data[SemanticKeys.URL] = event
data[SemanticKeys.TYPE] = "concert"
data[SemanticKeys.DESCRIPTION] = eventSite.select("div#em-event-6").first()!!
.children()
.toList()
.filter {
(it.tagName() == "div" &&
!(it.classNames().contains("event_price") || it.classNames().contains("event_actions")))
||
(it.tagName() == "p" && !it.classNames().contains("event_time"))
}
.map { it.text() }
.filter { it.isNotBlank() }
.joinToString("\n")

val img = eventSite.select("div#em-event-6 p img")
if (!img.isEmpty()) {
data[SemanticKeys.PICTUREURL] = img.first()!!.attr("src")
}

data[SemanticKeys.LOCATION_NAME] = "Viper Room"

return Event(name, startDate, data)
}

private fun parseDate(event: Element): OffsetDateTime {

val fullDateText = event.select("p.event_time").textNodes()[0].text()
val dateText = fullDateText.split(", ")[1].trim()

val fullTimeText = event.select("span.event_doors").text()
val timeText = fullTimeText.removePrefix("Doors open ").trim()

val localDate = LocalDate.parse(dateText, DateTimeFormatter.ofPattern("dd.MM.uuuu", Locale.GERMAN))
val localTime = LocalTime.parse(timeText, DateTimeFormatter.ofPattern("kk:mm"))

return localDate.atTime(localTime).atZone(ZoneId.of("Europe/Vienna")).toOffsetDateTime()
}
}

0 comments on commit a79855e

Please sign in to comment.