-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
137 lines (126 loc) · 3.31 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
package main
import (
"os"
"runtime"
"path/filepath"
"sync"
"fmt"
"fshash/cfg"
"time"
"log"
"crypto/sha1"
"io"
"bufio"
"encoding/hex"
)
var taskWg sync.WaitGroup
var recordWg sync.WaitGroup
func main(){
log.Printf("开始处理文件夹%s filter:%s",*cfg.RootDir,*cfg.Filter)
numCpu := runtime.NumCPU()
runtime.GOMAXPROCS(numCpu)
stime:=time.Now()
go func(){
taskWg.Add(1)
err:=filepath.Walk(*cfg.RootDir, walkFunc)
if err!=nil{
log.Println(err)
}
close(visitDataChan)
taskWg.Add(-1)
}()
for i := 0; i < numCpu; i++ {
taskWg.Add(1)
go taskItem()
}
go recordHash(*cfg.Out)
taskWg.Wait()
//hash处理现成后关闭,关闭中转hashvalue的chan
close(hashValuesChan)
recordWg.Wait()//阻塞直到recordHash执行完成.
log.Printf("文件夹处理完成,花费时间%f秒",time.Since(stime).Seconds())
}
//walkFunc给管道发送的数据结构(path fileinfo)
type visitData struct {
path string
info os.FileInfo
}
var visitDataChan chan *visitData =make(chan *visitData,3000)
//遍历root下所有文件路径
func walkFunc(fpath string, info os.FileInfo, err error) error {
//log.Println("walk:",fpath)
if *cfg.Filter!=""{
//skipdir
ok, err1 := filepath.Match(*cfg.Filter, filepath.Dir(fpath));err=err1
if ok {
log.Println("skip dir:",filepath.Dir(fpath), info.Name())
// Filter匹配时,跳过当前目录的包含子目录 和 当前目录的后续文件
// 注意会跳过子目录
return filepath.SkipDir
}
//skipfile
ok, err1 = filepath.Match(*cfg.Filter, info.Name());err=err1
if ok {
log.Println("skip file:",fpath)
return nil
}
}
if err==nil{
taskWg.Add(1)
visitDataChan <-&visitData{path:fpath,info:info}
}
return err
}
func taskItem(){
for vdata := range visitDataChan {
//排除链接符号/文件夹 等文件: ModeDir | ModeSymlink | ModeNamedPipe | ModeSocket | ModeDevice
if vdata.info.Mode().IsRegular() && vdata.info.Size()>0{
sh1:=sha1.New()
filein, err := os.Open(vdata.path);
if err != nil {
log.Println("文件打开错误",err, vdata.path)
continue
}
blen,err:=io.Copy(sh1,filein)
filein.Close()
if err==nil{
//line:=fmt.Sprintf("%s %s %d\n",vdata.path,hex.EncodeToString(sh1.Sum(nil)),blen)
hashValuesChan<-&hashData{path:vdata.path,sum:hex.EncodeToString(sh1.Sum(nil)),size:blen}
//log.Printf(line)
}else{
log.Println("err",err,vdata.path)
}
}
taskWg.Add(-1)
}
taskWg.Add(-1)
}
//文件hashSum完的结果 管道,接收字符串格式为:{文件名,hashSum,Filesize}
var hashValuesChan chan *hashData=make(chan *hashData,2000)
type hashData struct {
path string
sum string
size int64
}
//不断把hashValuesChan管道的内容写到文件
func recordHash(outFile string){
var fSizeCounter int64
var fCount int64
out,err:=os.Create(outFile);defer out.Close()
if err!=nil{
log.Printf("outFile err",err)
os.Exit(1)
}
//bufio writer 默认可以每4k写一次文件,可以用NewWriterSize设置buffer_size
fwriter:=bufio.NewWriter(out)
recordWg.Add(1)
for hdata:=range hashValuesChan{
line:=fmt.Sprintf("%s,%s,%d\n",hdata.path,hdata.sum,hdata.size)
fwriter.WriteString(line)
fSizeCounter+=hdata.size
fCount++
}
fwriter.Flush()
recordWg.Done()
log.Printf("共处理文件数量%d ,累计大小:%f兆",fCount,float32(fSizeCounter)/1024/1024)
}