Skip to content

Commit bbef07f

Browse files
authored
Merge pull request #1388 from coltea/feat-doc-tree
feat doc tree
2 parents fd28505 + 8416484 commit bbef07f

File tree

22 files changed

+7153
-11760
lines changed

22 files changed

+7153
-11760
lines changed

backend/api/crawler/v1/crawler.go

Lines changed: 31 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,40 @@
11
package v1
22

3-
import "github.com/chaitin/panda-wiki/consts"
3+
import (
4+
"github.com/chaitin/panda-wiki/consts"
5+
"github.com/chaitin/panda-wiki/pkg/anydoc"
6+
)
47

5-
type ScrapeReq struct {
6-
URL string `json:"url" validate:"required"`
7-
KbID string `json:"kb_id" validate:"required"`
8+
type CrawlerParseReq struct {
9+
Key string `json:"key"`
10+
KbID string `json:"kb_id" validate:"required"`
11+
CrawlerSource consts.CrawlerSource `json:"crawler_source" validate:"required"`
12+
Filename string `json:"filename"`
13+
FeishuSetting FeishuSetting `json:"feishu_setting"`
814
}
915

10-
type ScrapeResp struct {
16+
type FeishuSetting struct {
17+
UserAccessToken string `json:"user_access_token"`
18+
AppID string `json:"app_id"`
19+
AppSecret string `json:"app_secret"`
20+
SpaceId string `json:"space_id"`
21+
}
22+
23+
type CrawlerParseResp struct {
24+
ID string `json:"id"`
25+
Docs anydoc.Child `json:"docs"`
26+
}
27+
28+
type CrawlerExportReq struct {
29+
KbID string `json:"kb_id" validate:"required"`
30+
ID string `json:"id" validate:"required"`
31+
DocID string `json:"doc_id" validate:"required"`
32+
SpaceId string `json:"space_id"`
33+
FileType string `json:"file_type"`
34+
}
35+
36+
type CrawlerExportResp struct {
1137
TaskId string `json:"task_id"`
12-
Title string `json:"title"`
1338
}
1439

1540
type CrawlerResultReq struct {
@@ -34,52 +59,3 @@ type CrawlerResultItem struct {
3459
Status consts.CrawlerStatus `json:"status"`
3560
Content string `json:"content"`
3661
}
37-
38-
type SitemapParseReq struct {
39-
URL string `json:"url" validate:"required"`
40-
}
41-
42-
type SitemapParseResp struct {
43-
ID string `json:"id"`
44-
List []SitemapParseItem `json:"list"`
45-
}
46-
47-
type SitemapParseItem struct {
48-
URL string `json:"url"`
49-
Title string `json:"title"`
50-
}
51-
52-
type SitemapScrapeReq struct {
53-
KbID string `json:"kb_id" validate:"required"`
54-
ID string `json:"id" validate:"required"`
55-
URL string `json:"url" validate:"required"`
56-
}
57-
58-
type SitemapScrapeResp struct {
59-
Content string `json:"content"`
60-
}
61-
62-
type RssParseReq struct {
63-
URL string `json:"url" validate:"required"`
64-
}
65-
66-
type RssParseResp struct {
67-
ID string `json:"id"`
68-
List []RssParseItem `json:"list"`
69-
}
70-
71-
type RssParseItem struct {
72-
URL string `json:"url"`
73-
Title string `json:"title"`
74-
Desc string `json:"desc"`
75-
}
76-
77-
type RssScrapeReq struct {
78-
KbID string `json:"kb_id" validate:"required"`
79-
ID string `json:"id" validate:"required"`
80-
URL string `json:"url" validate:"required"`
81-
}
82-
83-
type RssScrapeResp struct {
84-
Content string `json:"content"`
85-
}

backend/consts/parse.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package consts
2+
3+
type CrawlerSource string
4+
5+
const (
6+
// CrawlerSourceUrl key或url形式 直接走parse接口
7+
CrawlerSourceUrl CrawlerSource = "url"
8+
CrawlerSourceRSS CrawlerSource = "rss"
9+
CrawlerSourceSitemap CrawlerSource = "sitemap"
10+
CrawlerSourceNotion CrawlerSource = "notion"
11+
CrawlerSourceFeishu CrawlerSource = "feishu"
12+
13+
// CrawlerSourceFile file形式 需要先走upload接口先上传文件
14+
CrawlerSourceFile CrawlerSource = "file"
15+
CrawlerSourceEpub CrawlerSource = "epub"
16+
CrawlerSourceYuque CrawlerSource = "yuque"
17+
CrawlerSourceSiyuan CrawlerSource = "siyuan"
18+
CrawlerSourceMindoc CrawlerSource = "mindoc"
19+
CrawlerSourceWikijs CrawlerSource = "wikijs"
20+
CrawlerSourceConfluence CrawlerSource = "confluence"
21+
)
22+
23+
type CrawlerSourceType string
24+
25+
const (
26+
CrawlerSourceTypeFile CrawlerSourceType = "file"
27+
CrawlerSourceTypeUrl CrawlerSourceType = "url"
28+
CrawlerSourceTypeKey CrawlerSourceType = "key"
29+
)
30+
31+
func (c CrawlerSource) Type() CrawlerSourceType {
32+
switch c {
33+
case CrawlerSourceNotion, CrawlerSourceFeishu:
34+
return CrawlerSourceTypeKey
35+
case CrawlerSourceUrl, CrawlerSourceRSS, CrawlerSourceSitemap:
36+
return CrawlerSourceTypeUrl
37+
case CrawlerSourceFile, CrawlerSourceEpub, CrawlerSourceYuque, CrawlerSourceSiyuan, CrawlerSourceMindoc, CrawlerSourceWikijs, CrawlerSourceConfluence:
38+
return CrawlerSourceTypeFile
39+
default:
40+
return ""
41+
}
42+
}

0 commit comments

Comments
 (0)