Skip to content

Commit

Permalink
feat: 基于 grpc 的插件系统
Browse files Browse the repository at this point in the history
  • Loading branch information
WAAutoMaton committed Oct 3, 2019
1 parent 543e2f0 commit d062908
Show file tree
Hide file tree
Showing 9 changed files with 335 additions and 170 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,9 @@
crawler
config/
log.txt
*.pb.go
api_pb2_grpc.py
api_pb2.py
*.swp
*.swo

25 changes: 7 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,22 +1,11 @@
build: crawler plugin/loj.so plugin/bzoj.so plugin/uoj.so plugin/guoj.so plugin/cogs.so plugin/seuoj.so
build: crawler plugin/uoj/uoj
clean:
rm plugin/*.so
crawler: main.go plugin/public/tools.go
rm crawler rpc/api.pb.go plugin/uoj/uoj
crawler: main.go plugin/public/tools.go rpc/api.pb.go
go build ./
plugin/loj.so: plugin/loj/loj.go plugin/public/tools.go plugin/syzoj/main.go
go build -buildmode=plugin -o ./plugin/loj.so ./plugin/loj/
plugin/seuoj.so: plugin/seuoj/seuoj.go plugin/public/tools.go plugin/syzoj/main.go
go build -buildmode=plugin -o ./plugin/seuoj.so ./plugin/seuoj/
plugin/bzoj.so: plugin/bzoj/bzoj.go plugin/public/tools.go
go build -buildmode=plugin -o ./plugin/bzoj.so ./plugin/bzoj/
plugin/uoj.so: plugin/uoj/uoj.go plugin/public/tools.go
go build -buildmode=plugin -o ./plugin/uoj.so ./plugin/uoj/
plugin/guoj.so: plugin/guoj/guoj.go plugin/public/tools.go plugin/syzoj/main.go
go build -buildmode=plugin -o ./plugin/guoj.so ./plugin/guoj/
plugin/tsinsen.so: plugin/tsinsen/tsinsen.go plugin/public/tools.go
go build -buildmode=plugin -o ./plugin/tsinsen.so ./plugin/tsinsen/
plugin/cogs.so: plugin/cogs/cogs.go plugin/public/tools.go
go build -buildmode=plugin -o ./plugin/cogs.so ./plugin/cogs/

rpc/api.pb.go: rpc/api.proto rpc/gen.go
go generate rpc/gen.go
plugin/uoj/uoj: plugin/uoj/uoj.go plugin/public/tools.go rpc/api.pb.go
go build -o ./plugin/uoj/uoj ./plugin/uoj/
.PHONY: build
.IGNORE: clean
47 changes: 30 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,56 @@

本项目为 OI-Archive 的题库爬虫。

项目分为一个主服务和若干组件,每个组件负责一个题库。主服务和组件间使用 grpc 连接。


## 编译运行

## 编译

首先安装 protobuf

```shell
sudo apt install golang-go
go get github.com/oi-archive/crawler
cd ~/go/src/github.com/oi-archive/crawler
make
./crawler
```



## 插件 API
### 运行

* 启动主服务 `./crawler`
* 分别运行 `plugin` 目录中的所有组件



每个题库的爬虫都以插件的形式被爬虫主程序调用,具体的格式如下
## 开发指南

#### Go
主服务提供的 API 见 `rpc/api.proto` (相信大家都能看懂 protobuf 文件,即使看不懂也没关系,可以看下面的各语言示例)

对于 go 语言,需要实现以下接口,然后以 plugin 模式编译,即可正常被主程序调用。
#### Go

```go
func Name() string // 返回题库名称
func Start(logg *log.Logger) // 在且仅在插件初始化时被调用一次
`plugin/example-go`复制一份,然后在标记了 `TODO: ` 的位置编写你的代码。

// 每次主程序要求爬虫进行一次更新时会被调用
// limit: 主程序希望爬虫这一次爬取的题目数量(非严格要求,爬虫可以自行决定到底爬几题)
// public.Filelist: map[string][]byte 类型,表示这次更新的文件列表,key表示文件的完整路径名,value表示文件内容
// error: 本次爬虫运行是否出现致命错误。若非空则主程序将忽略这次爬取的结果。
func Update(limit int) (public.FileList, error)
### Python3

func Stop() // 可能在插件关闭时被调用
环境准备:

```shell
pip3 install grpcio
pip3 install grpcio-tools
pip3 install apscheduler
```

`plugin/example-python`复制一份,进入新的目录

```shell
python3 -m grpc_tools.protoc -I../../rpc/ --python_out=. --grpc_python_out=. ../../rpc/api.proto
```

然后在标记了 `TODO: ` 的位置编写你的代码。

#### 其他语言
### 其他语言

对于其他语言,你只需要用你喜欢的方式实现上面的几个接口,然后导出为 C 语言格式的库文件即可
如果需要用其他语言开发爬虫,请联系 @WAAutoMaton 获取技术支持
170 changes: 56 additions & 114 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
package main

import (
"context"
"encoding/json"
"fmt"
. "github.com/oi-archive/crawler/plugin/public"
"github.com/robfig/cron"
"github.com/golang/protobuf/ptypes/empty"
"github.com/oi-archive/crawler/rpc"
"google.golang.org/grpc"
"google.golang.org/grpc/reflection"
"gopkg.in/libgit2/git2go.v26"
"io"
"io/ioutil"
"log"
"os"
"path/filepath"
"net"
"plugin"
"time"
)
Expand Down Expand Up @@ -75,7 +76,7 @@ func addFileAndCommit(fileList map[string][]byte, problemsetName string) error {
if err != nil {
return err
}
Log.Println(commitID)
log.Println(commitID)
nextTip, err := gitRepo.LookupCommit(commitID)
if err != nil {
return err
Expand Down Expand Up @@ -109,135 +110,76 @@ func gitPush() error {
}
return nil
}
func runUpdate() {
for _, p := range P {
pName := try(p.Lookup("Name")).(func() string)()
var fileList FileList
var err error = nil
func() {
defer func() {
if t := recover(); t != nil {
err = t.(error)
}
}()
fileList, err = try(p.Lookup("Update")).(func(int) (FileList, error))(200)
}()
if err != nil {
Log.Printf(`call "Update" error in plugin %s: %v\n`, pName, err)
continue
}
err = addFileAndCommit(fileList, pName)
if err != nil {
Log.Println("git err:", err)
currentBranch, err := gitRepo.Head()
if err != nil {
Log.Panicln("git error:", err)
}
currentTip, err := gitRepo.LookupCommit(currentBranch.Target())
if err != nil {
Log.Panicln("git error:", err)
}
err = gitRepo.ResetToCommit(currentTip, git.ResetHard, &git.CheckoutOpts{})
if err != nil {
Log.Panicln("git error:", err)
}
} else {
err = gitPush()
if err != nil {
Log.Println("git push error:", err)
}
}
Log.Println("Updated " + pName)
}
}

var Log *log.Logger
type server struct{}

func initLog() {
logFile, err := os.OpenFile("log.txt", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0777)
if err != nil {
fmt.Printf("open file error=%s\r\n", err.Error())
os.Exit(-1)
}
type Plugin struct {
id string
name string
}

writers := []io.Writer{
logFile,
os.Stdout,
}
var plu map[string]*Plugin

fileAndStdoutWriter := io.MultiWriter(writers...)
Log = log.New(fileAndStdoutWriter, "", log.Ldate|log.Ltime)
func (s *server) Register(c context.Context, req *rpc.RegisterRequest) (*rpc.RegisterReply, error) {
log.Println(req.Id, req.Name)
plu[req.Id] = &Plugin{id: req.Id, name: req.Name}
return &rpc.RegisterReply{DebugMode: true}, nil
}
func main() {
initLog()
err := filepath.Walk("plugin", func(path string, info os.FileInfo, err error) error {
// 遍历目录查找插件
if info.IsDir() {
return nil
}
p, err := plugin.Open(path)
if err != nil {
return nil
}
// 插件接口检查
f, err := p.Lookup("Name")
if err != nil {
Log.Panicf(`Lookup "Name" in plugin %s error`, path)
}
if _, ok := f.(func() string); !ok {
Log.Panicf(`Check "Name" in plugin %s error`, path)
}
f, err = p.Lookup("Start")

func (s *server) Deregister(c context.Context, req *rpc.DeregisterRequest) (*empty.Empty, error) {
return &empty.Empty{}, nil
}

func (s *server) Update(c context.Context, req *rpc.UpdateRequest) (*rpc.UpdateReply, error) {
err := addFileAndCommit(req.File, req.Id)
if err != nil {
log.Println("git error:", err)
currentBranch, err := gitRepo.Head()
if err != nil {
Log.Panicf(`Lookup "Start" in plugin %s error`, path)
log.Panicln("git error:", err)
}
if _, ok := f.(func(*log.Logger) error); !ok {
Log.Panicf(`Check "Start" in plugin %s error`, path)
}
f, err = p.Lookup("Update")
currentTip, err := gitRepo.LookupCommit(currentBranch.Target())
if err != nil {
Log.Panicf(`Lookup "Update" in plugin %s error`, path)
}
if _, ok := f.(func(int) (FileList, error)); !ok {
Log.Panicf(`Check "Update" in plugin %s error`, path)
log.Panicln("git error:", err)
}
f, err = p.Lookup("Stop")
err = gitRepo.ResetToCommit(currentTip, git.ResetHard, &git.CheckoutOpts{})
if err != nil {
Log.Panicf(`Lookup "Stop" in plugin %s error`, path)
log.Panicln("git error:", err)
}
if _, ok := f.(func()); !ok {
Log.Panicf(`Check "Stop" in plugin %s error`, path)
}
Log.Printf("open plguin %s succeed", path)
P = append(P, p)
return nil
})
if err != nil {
Log.Panic(err)
}
Log.Println("插件载入完成")
for _, p := range P {
err := try(p.Lookup("Start")).(func(*log.Logger) error)(Log)
return &rpc.UpdateReply{Ok: false}, nil
} else {
err = gitPush()
if err != nil {
Log.Panicf(`call "Start" error in plugin %s: %v\n`, try(p.Lookup("Name")).(func() string)(), err)
log.Println("git push error:", err)
return &rpc.UpdateReply{Ok: false}, nil
}
}
log.Println("插件启动完成")
return &rpc.UpdateReply{Ok: true}, nil
}

func main() {
var err error
gitRepo, err = git.OpenRepository("../source")
if err != nil {
Log.Panicln(err)
log.Panicln(err)
}
b, err := ioutil.ReadFile("config/sshkey.json")
if err != nil {
Log.Panicln(err)
log.Panicln(err)
}
err = json.Unmarshal(b, &sshkey)
if err != nil {
Log.Panicln(err)
log.Panicln(err)
}
plu = make(map[string]*Plugin)
lis, err := net.Listen("tcp", ":27381")
if err != nil {
log.Fatalf("failed to listen: %v", err)
}
s := grpc.NewServer()
rpc.RegisterAPIServer(s, &server{})
reflection.Register(s)
if err := s.Serve(lis); err != nil {
log.Fatalf("failed to serve: %v", err)
}
runUpdate()
c := cron.New()
_ = c.AddFunc("@midnight", runUpdate)
c.Start()
select {}
}
Loading

0 comments on commit d062908

Please sign in to comment.