Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 31 additions & 55 deletions backend/api/crawler/v1/crawler.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
package v1

import "github.com/chaitin/panda-wiki/consts"
import (
"github.com/chaitin/panda-wiki/consts"
"github.com/chaitin/panda-wiki/pkg/anydoc"
)

type ScrapeReq struct {
URL string `json:"url" validate:"required"`
KbID string `json:"kb_id" validate:"required"`
type CrawlerParseReq struct {
Key string `json:"key"`
KbID string `json:"kb_id" validate:"required"`
CrawlerSource consts.CrawlerSource `json:"crawler_source" validate:"required"`
Filename string `json:"filename"`
FeishuSetting FeishuSetting `json:"feishu_setting"`
}

type ScrapeResp struct {
type FeishuSetting struct {
UserAccessToken string `json:"user_access_token"`
AppID string `json:"app_id"`
AppSecret string `json:"app_secret"`
SpaceId string `json:"space_id"`
}

type CrawlerParseResp struct {
ID string `json:"id"`
Docs anydoc.Child `json:"docs"`
}

type CrawlerExportReq struct {
KbID string `json:"kb_id" validate:"required"`
ID string `json:"id" validate:"required"`
DocID string `json:"doc_id" validate:"required"`
SpaceId string `json:"space_id"`
FileType string `json:"file_type"`
}

type CrawlerExportResp struct {
TaskId string `json:"task_id"`
Title string `json:"title"`
}

type CrawlerResultReq struct {
Expand All @@ -34,52 +59,3 @@ type CrawlerResultItem struct {
Status consts.CrawlerStatus `json:"status"`
Content string `json:"content"`
}

type SitemapParseReq struct {
URL string `json:"url" validate:"required"`
}

type SitemapParseResp struct {
ID string `json:"id"`
List []SitemapParseItem `json:"list"`
}

type SitemapParseItem struct {
URL string `json:"url"`
Title string `json:"title"`
}

type SitemapScrapeReq struct {
KbID string `json:"kb_id" validate:"required"`
ID string `json:"id" validate:"required"`
URL string `json:"url" validate:"required"`
}

type SitemapScrapeResp struct {
Content string `json:"content"`
}

type RssParseReq struct {
URL string `json:"url" validate:"required"`
}

type RssParseResp struct {
ID string `json:"id"`
List []RssParseItem `json:"list"`
}

type RssParseItem struct {
URL string `json:"url"`
Title string `json:"title"`
Desc string `json:"desc"`
}

type RssScrapeReq struct {
KbID string `json:"kb_id" validate:"required"`
ID string `json:"id" validate:"required"`
URL string `json:"url" validate:"required"`
}

type RssScrapeResp struct {
Content string `json:"content"`
}
42 changes: 42 additions & 0 deletions backend/consts/parse.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package consts

type CrawlerSource string

const (
// CrawlerSourceUrl key或url形式 直接走parse接口
CrawlerSourceUrl CrawlerSource = "url"
CrawlerSourceRSS CrawlerSource = "rss"
CrawlerSourceSitemap CrawlerSource = "sitemap"
CrawlerSourceNotion CrawlerSource = "notion"
CrawlerSourceFeishu CrawlerSource = "feishu"

// CrawlerSourceFile file形式 需要先走upload接口先上传文件
CrawlerSourceFile CrawlerSource = "file"
CrawlerSourceEpub CrawlerSource = "epub"
CrawlerSourceYuque CrawlerSource = "yuque"
CrawlerSourceSiyuan CrawlerSource = "siyuan"
CrawlerSourceMindoc CrawlerSource = "mindoc"
CrawlerSourceWikijs CrawlerSource = "wikijs"
CrawlerSourceConfluence CrawlerSource = "confluence"
)

type CrawlerSourceType string

const (
CrawlerSourceTypeFile CrawlerSourceType = "file"
CrawlerSourceTypeUrl CrawlerSourceType = "url"
CrawlerSourceTypeKey CrawlerSourceType = "key"
)

func (c CrawlerSource) Type() CrawlerSourceType {
switch c {
case CrawlerSourceNotion, CrawlerSourceFeishu:
return CrawlerSourceTypeKey
case CrawlerSourceUrl, CrawlerSourceRSS, CrawlerSourceSitemap:
return CrawlerSourceTypeUrl
case CrawlerSourceFile, CrawlerSourceEpub, CrawlerSourceYuque, CrawlerSourceSiyuan, CrawlerSourceMindoc, CrawlerSourceWikijs, CrawlerSourceConfluence:
return CrawlerSourceTypeFile
default:
return ""
}
}
Loading