shad-go/distbuild/pkg/worker/worker.go

132 lines
2.9 KiB
Go
Raw Normal View History

2020-03-10 12:08:59 +00:00
package worker
import (
2020-03-14 10:24:44 +00:00
"bytes"
2020-03-10 12:08:59 +00:00
"context"
2020-03-14 10:24:44 +00:00
"encoding/json"
"fmt"
"io/ioutil"
2020-03-11 22:46:45 +00:00
"net/http"
2020-03-10 12:08:59 +00:00
"sync"
2020-03-11 22:46:45 +00:00
"go.uber.org/zap"
2020-03-10 12:08:59 +00:00
"gitlab.com/slon/shad-go/distbuild/pkg/artifact"
"gitlab.com/slon/shad-go/distbuild/pkg/build"
"gitlab.com/slon/shad-go/distbuild/pkg/filecache"
2020-03-14 10:24:44 +00:00
"gitlab.com/slon/shad-go/distbuild/pkg/proto"
2020-03-10 12:08:59 +00:00
)
type Worker struct {
2020-03-28 21:34:09 +00:00
id proto.WorkerID
2020-03-11 22:46:45 +00:00
coordinatorEndpoint string
log *zap.Logger
fileCache *filecache.Cache
artifacts *artifact.Cache
2020-03-10 12:08:59 +00:00
2020-03-11 22:46:45 +00:00
mux *http.ServeMux
2020-03-10 12:08:59 +00:00
mu sync.Mutex
newArtifacts []build.ID
newSources []build.ID
2020-03-23 20:58:08 +00:00
finishedJobs []proto.JobResult
2020-03-10 12:08:59 +00:00
}
2020-03-11 22:46:45 +00:00
func New(
2020-03-28 21:34:09 +00:00
workerID proto.WorkerID,
2020-03-11 22:46:45 +00:00
coordinatorEndpoint string,
log *zap.Logger,
fileCache *filecache.Cache,
artifacts *artifact.Cache,
) *Worker {
return &Worker{
2020-03-28 21:34:09 +00:00
id: workerID,
2020-03-11 22:46:45 +00:00
coordinatorEndpoint: coordinatorEndpoint,
log: log,
fileCache: fileCache,
artifacts: artifacts,
mux: http.NewServeMux(),
}
}
func (w *Worker) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
w.mux.ServeHTTP(rw, r)
}
2020-03-10 12:08:59 +00:00
func (w *Worker) recover() error {
2020-03-11 22:46:45 +00:00
return w.artifacts.Range(func(file build.ID) error {
2020-03-10 12:08:59 +00:00
w.newArtifacts = append(w.newArtifacts, file)
return nil
})
}
2020-03-28 21:34:09 +00:00
func (w *Worker) sendHeartbeat(ctx context.Context, req *proto.HeartbeatRequest) (*proto.HeartbeatResponse, error) {
2020-03-14 10:24:44 +00:00
reqJS, err := json.Marshal(req)
if err != nil {
return nil, err
}
2020-03-28 21:34:09 +00:00
httpReq, err := http.NewRequestWithContext(ctx, "POST", w.coordinatorEndpoint+"/heartbeat", bytes.NewBuffer(reqJS))
2020-03-14 10:24:44 +00:00
if err != nil {
return nil, err
}
httpRsp, err := http.DefaultClient.Do(httpReq)
if err != nil {
return nil, err
}
if httpRsp.StatusCode != http.StatusOK {
errorString, _ := ioutil.ReadAll(httpRsp.Body)
return nil, fmt.Errorf("heartbeat failed: %s", errorString)
}
var rsp proto.HeartbeatResponse
if err := json.NewDecoder(httpRsp.Body).Decode(&rsp); err != nil {
return nil, err
}
return &rsp, nil
}
2020-03-10 12:08:59 +00:00
func (w *Worker) Run(ctx context.Context) error {
if err := w.recover(); err != nil {
return err
}
for {
2020-03-14 10:24:44 +00:00
w.log.Debug("sending heartbeat request")
2020-03-28 21:34:09 +00:00
rsp, err := w.sendHeartbeat(ctx, w.buildHeartbeat())
2020-03-14 10:24:44 +00:00
if err != nil {
if ctx.Err() != nil {
return ctx.Err()
}
w.log.DPanic("heartbeat failed", zap.Error(err))
continue
}
w.log.Debug("received heartbeat response",
zap.Int("num_jobs", len(rsp.JobsToRun)))
2020-03-28 13:54:43 +00:00
for _, spec := range rsp.JobsToRun {
2020-03-28 14:35:01 +00:00
spec := spec
2020-03-28 21:34:09 +00:00
w.log.Debug("running job", zap.String("job_id", spec.Job.ID.String()))
2020-03-28 13:54:43 +00:00
result, err := w.runJob(ctx, &spec)
if err != nil {
2020-03-28 14:22:24 +00:00
errStr := fmt.Sprintf("job %s failed: %v", spec.Job.ID, err)
2020-03-28 21:34:09 +00:00
w.log.Debug("job failed", zap.String("job_id", spec.Job.ID.String()), zap.Error(err))
2020-03-28 13:54:43 +00:00
w.jobFinished(&proto.JobResult{ID: spec.Job.ID, Error: &errStr})
continue
2020-03-14 10:24:44 +00:00
}
2020-03-28 21:34:09 +00:00
w.log.Debug("job finished", zap.String("job_id", spec.Job.ID.String()))
2020-03-28 13:54:43 +00:00
w.jobFinished(result)
2020-03-14 10:24:44 +00:00
}
2020-03-10 12:08:59 +00:00
}
}