Skip to content

Commit

Permalink
Merge pull request #241 from syou6162/minor-fixes
Browse files Browse the repository at this point in the history
細かい問題に対応する
  • Loading branch information
lufia authored Oct 27, 2022
2 parents 2daefd6 + d0c0f1d commit e2a05af
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 14 deletions.
16 changes: 11 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,36 @@ LAMBDA_SABA_DISAMBIGUATOR_RULE_NAME ?= MackerelSocialNextCron

export CGO_ENABLED := 0

.PHONY: import-pos
import-pos:
touch _pos.json pos.json && cat _pos.json pos.json | jq -r .id_str > pos_cache_ids
cat data/pos.txt | go run import_json.go pos_cache_ids | tee -a _pos.json
go run import_json.go -a _pos.json pos_cache_ids <data/pos.txt
cat _pos.json | jq --slurp --compact-output 'unique_by(.id_str) | .[]' > pos.json

.PHONY: import-neg
import-neg:
touch _neg.json neg.json && cat _neg.json neg.json | jq -r .id_str > neg_cache_ids
cat data/neg.txt | go run import_json.go neg_cache_ids | tee -a _neg.json
go run import_json.go -a _neg.json neg_cache_ids <data/neg.txt
cat _neg.json | jq --slurp --compact-output 'unique_by(.id_str) | .[]' > neg.json

.PHONY: import
import:
@make import-pos import-neg

.PHONY: clean
clean:
rm _neg.json _pos.json neg.json neg_cache_ids pos.json pos_cache_ids
rm -f _neg.json _pos.json neg.json neg_cache_ids pos.json pos_cache_ids

.PHONY: learn
learn:
go run train_perceptron.go pos.json neg.json

.PHONY: format
format:
gofmt -w functions/**/*.go lib/*.go *.go
goimports -w functions/**/*.go lib/*.go *.go

.PHONY: sam-package
sam-package:
cd functions/saba_disambiguator; GOARCH=amd64 GOOS=linux go build -o build/saba_disambiguator main.go
if aws s3 ls "s3://${BUCKET_NAME}" 2>&1 | grep -q 'AccessDenied'; then \
Expand All @@ -43,11 +50,10 @@ sam-package:
--s3-prefix ${S3_PREFIX} \
--output-template-file sam.yml \

.PHONY: sam-deploy
sam-deploy:
${AWSCMD} deploy \
--template-file sam.yml \
--stack-name ${STACK_NAME} \
--parameter-overrides LambdaSabaDisambiguatorRuleName=${LAMBDA_SABA_DISAMBIGUATOR_RULE_NAME} \
--capabilities CAPABILITY_IAM

.PHONY: import learn sam-package sam-deploy
33 changes: 32 additions & 1 deletion import_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ package main

import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"strings"
Expand Down Expand Up @@ -47,12 +49,27 @@ func cacheIdsFromFile(filename string) (map[int64]struct{}, error) {
return cachedIds, nil
}

var flagAppend = flag.String("a", "", "append new tweets to `file`")

type WriteSyncer interface {
io.Writer
Sync() error
}

type nopWriter struct{}

func (*nopWriter) Write(p []byte) (int, error) { return len(p), nil }
func (*nopWriter) Sync() error { return nil }

func main() {
log.SetFlags(0)
flag.Parse()

config, err := sabadisambiguator.GetConfigFromFile("functions/saba_disambiguator/build/config.yml")
if err != nil {
log.Fatalf("failed to load config: %v\n", err)
}

svc := ssm.New(session.New(), &aws.Config{
Region: aws.String(config.Region),
})
Expand All @@ -62,11 +79,21 @@ func main() {
log.Fatalf("failed to get Twitter client: %v\n", err)
}

cachedIds, err := cacheIdsFromFile(os.Args[1])
cachedIds, err := cacheIdsFromFile(flag.Arg(0))
if err != nil {
log.Fatalf("failed to read cache: %v\n", err)
}

var w WriteSyncer = &nopWriter{}
if *flagAppend != "" {
f, err := os.OpenFile(*flagAppend, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
if err != nil {
log.Fatalf("failed to open '%s': %v\n", *flagAppend, err)
}
defer f.Close()
w = f
}

stdin := bufio.NewScanner(os.Stdin)
for stdin.Scan() {
text := stdin.Text()
Expand All @@ -87,8 +114,12 @@ func main() {

tweetJson, _ := json.Marshal(tweet)
fmt.Println(string(tweetJson))
fmt.Fprintln(w, string(tweetJson))
}
if err := stdin.Err(); err != nil {
log.Fatalln(err)
}
if err := w.Sync(); err != nil {
log.Fatalf("failed to flush tweets: %v\n", err)
}
}
4 changes: 2 additions & 2 deletions lib/example.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ type Example struct {

type Examples []*Example

func NewExample(tweet twitter.Tweet, label LabelType) *Example {
fv := ExtractFeatures(tweet)
func NewExampleWithOptions(tweet twitter.Tweet, label LabelType, opts ExtractOptions) *Example {
fv := ExtractFeaturesWithOptions(tweet, opts)
return &Example{Label: label, Fv: fv, Tweet: tweet}
}

Expand Down
6 changes: 1 addition & 5 deletions lib/feature.go
Original file line number Diff line number Diff line change
Expand Up @@ -145,18 +145,14 @@ func (opts *ExtractOptions) includeScreenNameInReplyToScreenName(t twitter.Tweet
return opts.contains(t.InReplyToScreenName)
}

func ExtractFeatures(t twitter.Tweet) FeatureVector {
return ExtractFeaturesWithOptions(t, ExtractOptions{})
}

func ExtractFeaturesWithOptions(t twitter.Tweet, opts ExtractOptions) FeatureVector {
var fv FeatureVector
text := t.Text

fv = append(fv, "BIAS")
fv = append(fv, "ScreenName:"+t.User.ScreenName)
fv = append(fv, "inReplyToScreenName:"+inReplyToScreenName(t))
fv = append(fv, "screenNameInQuotedStatus"+screenNameInQuotedStatus(t))
fv = append(fv, "screenNameInQuotedStatus:"+screenNameInQuotedStatus(t))
fv = append(fv, "lang:"+lang(t))
fv = append(fv, "containsMackerelInScreenName:"+strconv.FormatBool(opts.contains(t.User.ScreenName)))
fv = append(fv, "includeMackerelInUserMentions:"+strconv.FormatBool(opts.includeScreenNameInUserMentions(t)))
Expand Down
24 changes: 23 additions & 1 deletion train_perceptron.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
sabadisambiguator "github.com/syou6162/saba_disambiguator/lib"
)

var config *sabadisambiguator.Config

func parseLine(line string) (twitter.Tweet, error) {
var tweet twitter.Tweet
err := json.Unmarshal([]byte(line), &tweet)
Expand All @@ -36,7 +38,9 @@ func readExamplesFromFile(fileName string, label sabadisambiguator.LabelType) (s
continue
}

e := sabadisambiguator.NewExample(t, label)
e := sabadisambiguator.NewExampleWithOptions(t, label, sabadisambiguator.ExtractOptions{
ScreenNames: config.ScreenNames,
})
examples = append(examples, e)
}
if err := scanner.Err(); err != nil {
Expand All @@ -45,8 +49,26 @@ func readExamplesFromFile(fileName string, label sabadisambiguator.LabelType) (s
return examples, nil
}

func loadConfig(file string) (*sabadisambiguator.Config, error) {
c, err := sabadisambiguator.GetConfigFromFile(file)
if err != nil {
if os.IsNotExist(err) {
return &sabadisambiguator.Config{}, nil
}
return nil, err
}
return c, nil
}

func main() {
log.SetFlags(0)

c, err := loadConfig("functions/saba_disambiguator/build/config.yml")
if err != nil {
log.Fatalf("failed to load config: %v\n", err)
}
config = c

examplesPos, err := readExamplesFromFile(os.Args[1], sabadisambiguator.POSITIVE)
if err != nil {
log.Fatalf("failed to read %s: %v\n", os.Args[1], err)
Expand Down

0 comments on commit e2a05af

Please sign in to comment.