diff --git a/README.md b/README.md index 72cfb35..82658e7 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ ### **Vault Side Kick** ------ + **Summary:** -> Vault Sidekick is a add-on container which can be used as a generic entry-point for interacting with Hashicorp [Vault](https://vaultproject.io) service, retrieving secrets +Vault Sidekick is a add-on container which can be used as a generic entry-point for interacting with Hashicorp [Vault](https://vaultproject.io) service, retrieving secrets (both static and dynamic) and PKI certs. The sidekick will take care of renewal's and extension of leases for you and renew the credentials in the specified format for you. **Usage:** @@ -16,7 +16,6 @@ Usage of build/vault-sidekick: -log_dir="": If non-empty, write log files in this directory -logtostderr=false: log to standard error instead of files -output="/etc/secrets": the full path to write the protected resources (VAULT_OUTPUT if available) - -renew=true: whether or not to renew secrets from vault -stderrthreshold=0: logs at or above this threshold go to stderr -token="": the token used to authenticate to teh vault service (VAULT_TOKEN if available) -tokenfile="": the full path to file containing the vault token used for authentication (VAULT_TOKEN_FILE if available) @@ -54,6 +53,25 @@ The above say's - Apply the IAM policy, renew the policy when required and file the API tokens to .s3_creds in the /etc/secrets directory - Read the template at /etc/templates/db.tmpl, produce the content from Vault and write to /etc/credentials file +**Secret Renewals** + +The default behaviour of vault-sidekick is **not** to renew a lease, but to retrieve a new secret and allow the previous to +expire, in order ensure the rotation of secrets. If you don't want this behaviour on a resource you can override using resource options. For exmaple, +your using the mysql dynamic secrets, you want to renew the secret not replace it + +```shell +[jest@starfury vault-sidekick]$ build/vault-sidekick -cn=mysql:my_database:fmt=yaml,rn=true +or an iam policy renewed every hour +[jest@starfury vault-sidekick]$ build/vault-sidekick -cn=aws:aws_policy_path:fmt=yaml,rn=true,up=1h + +``` + +Or you want to rotate the secret every **1h** and **revoke** the previous one + +```shell +[jest@starfury vault-sidekick]$ build/vault-sidekick -cn=aws:my_s3_bucket:fmt=yaml,up=1h,rv=true +``` + **Output Formatting** The following output formats are supported: json, yaml, ini, txt @@ -87,6 +105,8 @@ The default format is 'txt' which has the following behavour. If the number of k **Resource Options** - **fn**: (filaname) by default all file are relative to the output directory specified and will have the name NAME.RESOURCE; the fn options allows you to switch names and paths to write the files -- **rn**: (renewal) allow you to set the renewal time on a resource, but default we take the lease time from the secret and use that, the rn feature allows use to override it -- **fmt**: (format) allows you to specify the output format of the resource / secret. -- **cn**: (comman name) is used in conjunction with the PKI resource. The common argument is passed as an argument when make a request to issue the certs. \ No newline at end of file +- **up**: (update) override the lease time of this resource and get/renew a secret on the specified duration e.g 1m, 2d, 5m10s +- **rn**: (renewal) override the default behavour on this resource, renew the resource when coming close to expiration e.g true, TRUE +- **rv**: (revoke) revoke the old lease when you get retrieve a old one e.g. true, TRUE (default to allow the lease to expire and naturally revoke) +- **fmt**: (format) allows you to specify the output format of the resource / secret, e.g json, yaml, ini, txt +- **cn**: (comman name) is used in conjunction with the PKI resource. The common argument is passed as an argument when make a request to issue the certs. \ No newline at end of file diff --git a/config.go b/config.go index a2b91c2..d445acf 100644 --- a/config.go +++ b/config.go @@ -35,8 +35,6 @@ type config struct { vaultTokenFile string // the place to write the resources secretsDirectory string - // whether or not to renew the leases on our resources - renewResources bool // whether of not to remove the token post connection deleteToken bool // switch on dry run @@ -58,7 +56,6 @@ func init() { flag.StringVar(&options.vaultTokenFile, "tokenfile", getEnv("VAULT_TOKEN_FILE", ""), "the full path to file containing the vault token used for authentication (VAULT_TOKEN_FILE if available)") flag.StringVar(&options.secretsDirectory, "output", getEnv("VAULT_OUTPUT", "/etc/secrets"), "the full path to write the protected resources (VAULT_OUTPUT if available)") flag.BoolVar(&options.deleteToken, "delete-token", false, "once the we have connected to vault, delete the token file from disk") - flag.BoolVar(&options.renewResources, "renew", true, "whether or not to renew secrets from vault") flag.BoolVar(&options.dryRun, "dry-run", false, "perform a dry run, printing the content to screen") flag.DurationVar(&options.statsInterval, "stats", time.Duration(5) * time.Minute, "the interval to produce statistics on the accessed resources") flag.Var(options.resources, "cn", "a resource to retrieve and monitor from vault (e.g. pki:name:cert.name, secret:db_password, aws:s3_backup)") diff --git a/main.go b/main.go index 83a528c..3e85387 100644 --- a/main.go +++ b/main.go @@ -20,7 +20,6 @@ import ( "bytes" "encoding/json" "fmt" - "io/ioutil" "os" "os/signal" "strings" @@ -84,10 +83,9 @@ func processResource(rn *vaultResource, data map[string]interface{}) error { } // step: get the output format - contentFormat := rn.getFormat() - glog.V(3).Infof("saving resource: %s, format: %s", rn, contentFormat) + glog.V(3).Infof("saving resource: %s, format: %s", rn, rn.format) - switch contentFormat { + switch rn.format { case "yaml": // marshall the content to yaml if content, err = yaml.Marshal(data); err != nil { @@ -142,7 +140,13 @@ func writeFile(filename string, content []byte) error { return nil } - if err := ioutil.WriteFile(filename, content, 0440); err != nil { + file, err := os.Create(filename) + if err != nil { + return err + } + defer file.Close() + + if _, err := file.Write(content); err != nil { return err } diff --git a/tests/demo-iam-policy.json b/tests/demo-iam-policy.json new file mode 100644 index 0000000..dc452d0 --- /dev/null +++ b/tests/demo-iam-policy.json @@ -0,0 +1,8 @@ +{ + "Version": "2012-10-17", + "Statement": { + "Effect": "Allow", + "Action": "s3:Get*", + "Resource": "arn:aws:s3:::dev-ceph-backups-eu-west-1" + } +} diff --git a/utils.go b/utils.go index 57e9935..e7f38f2 100644 --- a/utils.go +++ b/utils.go @@ -40,6 +40,11 @@ func showUsage(message string, args ... interface{}) { os.Exit(0) } +// randomWait ... wait for a random amout of time +func randomWait(min, max int ) <-chan time.Time { + return time.After(time.Duration(getRandomWithin(min,max)) * time.Second) +} + // getKeys ... retrieve a list of keys from the map func getKeys(data map[string]interface{}) []string { var list []string diff --git a/vault.go b/vault.go index f4ae53e..e752b03 100644 --- a/vault.go +++ b/vault.go @@ -35,9 +35,7 @@ type vaultService struct { // the vault config config *api.Config // a channel to inform of a new resource to processor - resourceCh chan *watchedResource - // the statistics channel - statCh *time.Ticker + resourceChannel chan *watchedResource } type vaultResourceEvent struct { @@ -67,20 +65,11 @@ type watchedResource struct { secret *api.Secret } -// updateSecret ... sets the secret for the watched resource and updates the various counters / timers -func (r *watchedResource) updateSecret(secret *api.Secret) { - r.secret = secret - r.lastUpdated = time.Now() - r.leaseExpireTime = r.lastUpdated.Add(time.Duration(secret.LeaseDuration)) - glog.V(10).Infof("updating secret on resource: %s, leaseId: %s, lease: %s, expiration: %s", - r.resource, r.secret.LeaseID, r.secret.LeaseID, r.leaseExpireTime) -} - // notifyOnRenewal ... creates a trigger and notifies when a resource is up for renewal func (r *watchedResource) notifyOnRenewal(ch chan *watchedResource) { go func() { // step: check if the resource has a pre-configured renewal time - r.renewalTime = r.resource.leaseTime() + r.renewalTime = r.resource.update // step: if the answer is no, we set the notification between 80-95% of the lease time of the secret if r.renewalTime <= 0 { @@ -89,6 +78,7 @@ func (r *watchedResource) notifyOnRenewal(ch chan *watchedResource) { int(float64(r.secret.LeaseDuration) * 0.8), int(float64(r.secret.LeaseDuration) * 0.95))) * time.Second } + glog.V(3).Infof("setting a renewal notification on resource: %s, time: %s", r.resource, r.renewalTime) // step: wait for the duration <- time.After(r.renewalTime) @@ -110,8 +100,7 @@ func newVaultService(url, token string) (*vaultService, error) { service.config.Address = url // step: create the service processor channels - service.resourceCh = make(chan *watchedResource, 20) - service.statCh = time.NewTicker(options.statsInterval) + service.resourceChannel = make(chan *watchedResource, 20) // step: create the actual client service.client, err = api.NewClient(service.config) @@ -135,7 +124,10 @@ func (r vaultService) vaultServiceProcessor() { // a list of resource being watched items := make([]*watchedResource, 0) // the channel to receive renewal notifications on - renewing := make(chan *watchedResource, 5) + renewChannel:= make(chan *watchedResource, 10) + retrieveChannel := make(chan *watchedResource, 10) + revokeChannel := make(chan string, 10) + statsChannel := time.NewTicker(options.statsInterval) for { select { @@ -143,65 +135,102 @@ func (r vaultService) vaultServiceProcessor() { // - we retrieve the resource from vault // - if we error attempting to retrieve the secret, we background and reschedule an attempt to add it // - if ok, we grab the lease it and lease time, we setup a notification on renewal - case x := <-r.resourceCh: - glog.V(3).Infof("adding a resource into the service processor, resource: %s", x.resource) + case x := <-r.resourceChannel: + glog.Infof("adding a resource into the service processor, resource: %s", x.resource) + // step: add to the list of resources + items = append(items, x) + // step: push into the retrieval channel + retrieveChannel <- x + + case x := <- retrieveChannel: + // step: save the current lease if we have one + leaseId := "" + if x.secret != nil && x.secret.LeaseID != "" { + leaseId = x.secret.LeaseID + glog.V(10).Infof("resource: %s has a previous lease: %s", x.resource, leaseId) + } // step: retrieve the resource from vault err := r.get(x) if err != nil { glog.Errorf("failed to retrieve the resource: %s from vault, error: %s", x.resource, err) // reschedule the attempt for later - go func(x *watchedResource) { - <- time.After(time.Duration(getRandomWithin(2,10)) * time.Second) - r.resourceCh <- x - }(x) + r.reschedule(x, retrieveChannel, 3, 10) + break } - // step: setup a timer for renewal - x.notifyOnRenewal(renewing) + glog.Infof("succesfully retrieved resournce: %s, leaseID: %s", x.resource, x.secret.LeaseID) - // step: add to the list of resources - items = append(items, x) + // step: if we had a previous lease and the option is to revoke, lets throw into the revoke channel + if leaseId != "" && x.resource.revoked { + revokeChannel <- leaseId + } + + // step: setup a timer for renewal + x.notifyOnRenewal(renewChannel) // step: update the upstream consumers r.upstream(x, x.secret) // A watched resource is coming up for renewal - // - we attempt to grab the resource from vault + // - we attempt to renew the resource from vault // - if we encounter an error, we reschedule the attempt for the future // - if we're ok, we update the watchedResource and we send a notification of the change upstream - case x := <-renewing: - glog.V(3).Infof("resource: %s, lease: %s coming up for renewal, attempting to renew now", x.resource, x.secret.LeaseID) - // step: we attempt to renew the lease on a resource and if not successfully we reschedule - // a renewal notification for the future - // - we also have to handle the scenario where the lease has expired + case x := <-renewChannel: + + glog.V(4).Infof("resource: %s, lease: %s up for renewal, renewable: %t, revoked: %t", x.resource, + x.secret.LeaseID, x.resource.renewable, x.resource.revoked) // step: we need to check if the lease has expired? if time.Now().Before(x.leaseExpireTime) { glog.V(3).Infof("the lease on resource: %s has expired, we need to get a new lease", x.resource) - x.secret.Renewable = false + // push into the retrieval channel and break + retrieveChannel <- x + break } - err := r.renew(x) - if err != nil { - glog.Errorf("failed to renew the resounce: %s for renewal, error: %s", x.resource, err) - // reschedule the attempt for later - go func(x *watchedResource) { - <- time.After(time.Duration(getRandomWithin(3,20)) * time.Second) - renewing <- x - }(x) + // step: are we renewing the resource? + if x.resource.renewable { + // step: is the underlining resource even renewable? - otherwise we can just grab a new lease + if !x.secret.Renewable { + glog.V(10).Infof("the resource: %s is not renewable, retrieving a new lease instead", x.resource) + retrieveChannel <- x + break + } + + // step: lets renew the resource + err := r.renew(x) + if err != nil { + glog.Errorf("failed to renew the resounce: %s for renewal, error: %s", x.resource, err) + // reschedule the attempt for later + r.reschedule(x, renewChannel, 3, 10) + break + } + } + + // step: the option for this resource is not to renew the secret but regenerate a new secret + if !x.resource.renewable { + glog.V(4).Infof("resource: %s flagged as not renewable, shifting to regenerating the resource", x.resource) + retrieveChannel <- x break } // step: setup a timer for renewal - x.notifyOnRenewal(renewing) + x.notifyOnRenewal(renewChannel) // step: update any listener upstream r.upstream(x, x.secret) + case lease := <-revokeChannel: + + err := r.revoke(lease) + if err != nil { + glog.Errorf("failed to revoke the lease: %s, error: %s", lease, err) + } + // The statistics timer has gone off; we iterate the watched items and - case <-r.statCh.C: + case <-statsChannel.C: glog.V(3).Infof("stats: %d resources being watched", len(items)) for _, item := range items { glog.V(3).Infof("resourse: %s, lease id: %s, renewal in: %s seconds, expiration: %s", @@ -212,6 +241,14 @@ func (r vaultService) vaultServiceProcessor() { }() } +func (r vaultService) reschedule(rn *watchedResource, ch chan *watchedResource, min, max int) { + go func(x *watchedResource) { + glog.V(3).Infof("rescheduling the resource: %s, channel: %s", rn.resource, ch) + <-randomWait(min, max) + ch <- x + }(rn) +} + func (r vaultService) upstream(item *watchedResource, s *api.Secret) { // step: chunk this into a go-routine not to block us go func() { @@ -226,23 +263,39 @@ func (r vaultService) upstream(item *watchedResource, s *api.Secret) { // renew ... attempts to renew the lease on a resource // rn : the resource we wish to renew the lease on func (r vaultService) renew(rn *watchedResource) error { - - // step: can this secret be renewed - otherwise we can just grab a new lease + // step: extend the lease on a resource + glog.V(4).Infof("attempting to renew the lease: %s on resource: %s", rn.secret.LeaseID, rn.resource) + // step: check the resource is renewable if !rn.secret.Renewable { - glog.V(4).Infof("the resource: %s is not renewable, retrieving a new lease instead", rn.resource) - return r.get(rn) + return fmt.Errorf("the resource: %s is not renewable", rn.resource) } - // step: extend the lease on a resource - glog.V(3).Infof("attempting to renew the lease: %s on resource: %s", rn.secret.LeaseID, rn.resource) secret, err := r.client.Sys().Renew(rn.secret.LeaseID, 0) if err != nil { - glog.V(4).Infof("unable to renew the lease on resource: %s", rn.resource) + glog.Errorf("unable to renew the lease on resource: %s", rn.resource) return err } - // step: update the secret - rn.updateSecret(secret) + // step: update the resource + rn.lastUpdated = time.Now() + rn.leaseExpireTime = rn.lastUpdated.Add(time.Duration(secret.LeaseDuration)) + + glog.V(3).Infof("renewed resource: %s, leaseId: %s, lease_time: %s, expiration: %s", + rn.resource, rn.secret.LeaseID, rn.secret.LeaseID, rn.leaseExpireTime) + + return nil +} + +// revoke ... attempt to revoke the lease of a resource +// lease : the lease lease which was given when you got it +func (r vaultService) revoke(lease string) error { + glog.V(3).Infof("attemping to revoking the lease: %s", lease) + + err := r.client.Sys().Revoke(lease) + if err != nil { + return err + } + glog.V(3).Infof("successfully revoked the leaseId: %s", lease) return nil } @@ -250,8 +303,8 @@ func (r vaultService) renew(rn *watchedResource) error { // get ... retrieve a secret from the vault func (r vaultService) get(rn *watchedResource) (err error) { var secret *api.Secret - glog.V(5).Infof("attempting to retrieve the resource: %s from vault", rn.resource) + switch rn.resource.resource { case "pki": secret, err = r.client.Logical().Write(fmt.Sprintf("%s/issue/%s", rn.resource.resource, rn.resource.name), @@ -265,12 +318,25 @@ func (r vaultService) get(rn *watchedResource) (err error) { case "secret": secret, err = r.client.Logical().Read(fmt.Sprintf("%s/%s", rn.resource.resource, rn.resource.name)) } - if secret == nil && err == nil { - return fmt.Errorf("does not exist") + // step: return on error + if err != nil { + return err + } + if secret == nil && err != nil { + return fmt.Errorf("the resource does not exist") + } + + if secret == nil { + return fmt.Errorf("unable to retrieve the secret") } // step: update the watched resource - rn.updateSecret(secret) + rn.lastUpdated = time.Now() + rn.secret = secret + rn.leaseExpireTime = rn.lastUpdated.Add(time.Duration(secret.LeaseDuration)) + + glog.V(3).Infof("retrieved resource: %s, leaseId: %s, lease_time: %s", + rn.resource, rn.secret.LeaseID, time.Duration(rn.secret.LeaseDuration) * time.Second) return err } @@ -278,10 +344,10 @@ func (r vaultService) get(rn *watchedResource) (err error) { // watch ... add a watch on a resource and inform, renew which required and inform us when // the resource is ready func (r *vaultService) watch(rn *vaultResource, ch vaultEventsChannel) { - glog.V(10).Infof("adding the resource: %s, listener: %v to service processor", rn, ch) - r.resourceCh <- &watchedResource{ + glog.V(6).Infof("adding the resource: %s, listener: %v to service processor", rn, ch) + + r.resourceChannel <- &watchedResource{ resource: rn, listener: ch, } - } diff --git a/vault_resource.go b/vault_resource.go index 7950244..8e87764 100644 --- a/vault_resource.go +++ b/vault_resource.go @@ -20,6 +20,8 @@ import ( "fmt" "regexp" "time" + "strconv" +"github.com/golang/glog" ) const ( @@ -32,7 +34,13 @@ const ( // OptionTemplatePath ... the full path to a template OptionsTemplatePath = "tpl" // OptionRenew ... a duration to renew the resource - OptionRenew = "rn" + OptionRenewal = "rn" + // OptionRevoke ... revoke an old lease when retrieving a new one + OptionRevoke = "rv" + // OptionUpdate ... override the lease of the resource + OptionUpdate = "up" + + DefaultRenewable = "false" ) var ( @@ -46,11 +54,13 @@ var ( "mysql": true, "tpl": true, } - ) -func newVaultResource() *vaultResource { +func defaultVaultResource() *vaultResource { return &vaultResource{ + format: "yaml", + renewable: false, + revoked: false, options: make(map[string]string, 0), } } @@ -61,22 +71,20 @@ type vaultResource struct { resource string // the name of the resource name string + // the format of the resource + format string + // whether the resource should be renewed? + renewable bool + // whether the resource should be revoked? + revoked bool + // the lease duration + update time.Duration // additional options to the resource options map[string]string } -// leaseTime ... get the renew time otherwise return 0 -func (r vaultResource) leaseTime() time.Duration { - if _, found := r.options[OptionRenew]; found { - duration, _ := time.ParseDuration(r.options[OptionRenew]) - return duration - } - - return time.Duration(0) -} - // isValid ... checks to see if the resource is valid -func (r vaultResource) isValid() error { +func (r *vaultResource) isValid() error { // step: check the resource type if _, found := validResources[r.resource]; !found { return fmt.Errorf("unsupported resource type: %s", r.resource) @@ -84,7 +92,7 @@ func (r vaultResource) isValid() error { // step: check the options if err := r.isValidOptions(); err != nil { - return fmt.Errorf("invalid resource options: %s, %s", r.options, err) + return fmt.Errorf("invalid resource options, %s", err) } // step: check is have all the required options to this resource type @@ -95,17 +103,8 @@ func (r vaultResource) isValid() error { return nil } -// getFormat ... get the format of the resource -func (r vaultResource) getFormat() string { - if format, found := r.options[OptionFormat]; found { - return format - } - return "txt" -} - - // isValidResource ... validate the resource meets the requirements -func (r vaultResource) isValidResource() error { +func (r *vaultResource) isValidResource() error { switch r.resource { case "pki": if _, found := r.options[OptionCommonName]; !found { @@ -120,8 +119,8 @@ func (r vaultResource) isValidResource() error { return nil } -// isValidOptions ... iterates through the options and check they are ok -func (r vaultResource) isValidOptions() error { +// isValidOptions ... iterates through the options, converts the options and so forth +func (r *vaultResource) isValidOptions() error { // check the filename directive for opt, val := range r.options { switch opt { @@ -129,12 +128,33 @@ func (r vaultResource) isValidOptions() error { if matched := resourceFormatRegex.MatchString(r.options[OptionFormat]); !matched { return fmt.Errorf("unsupported output format: %s", r.options[OptionFormat]) } - case OptionRenew: - if _, err := time.ParseDuration(val); err != nil { - return fmt.Errorf("the renew option: %s is not value", val) + glog.V(20).Infof("setting the format: %s on resource: %s", val, r) + r.format = val + case OptionUpdate: + duration, err := time.ParseDuration(val) + if err != nil { + return fmt.Errorf("the update option: %s is not value, should be a duration format", val) } + glog.V(20).Infof("setting the update time: %s on resource: %s", duration, r) + r.update = duration + case OptionRevoke: + choice, err := strconv.ParseBool(val) + if err != nil { + return fmt.Errorf("the revoke option: %s is invalid, should be a boolean", val) + } + glog.V(20).Infof("setting the revoked: %t on resource: %s", choice, r) + r.revoked = choice + case OptionRenewal: + choice, err := strconv.ParseBool(val) + if err != nil { + return fmt.Errorf("the renewal option: %s is invalid, should be a boolean", val) + } + glog.V(20).Infof("setting the renewable: %t on resource: %s", choice, r) + r.renewable = choice case OptionFilename: + // @TODO need to check it's valid filename / path case OptionCommonName: + // @TODO need to check it's a valid hostname case OptionsTemplatePath: if exists, _ := fileExists(val); !exists { return fmt.Errorf("the template file: %s does not exist", val) @@ -157,5 +177,5 @@ func (r vaultResource) filename() string { // String ... a string representation of the struct func (r vaultResource) String() string { - return fmt.Sprintf("%s/%s", r.resource, r.name) + return fmt.Sprintf("%s/%s (%s|%t|%t)", r.resource, r.name, r.update, r.renewable, r.revoked) } diff --git a/vault_resource_test.go b/vault_resource_test.go index e0ae7c6..e03795c 100644 --- a/vault_resource_test.go +++ b/vault_resource_test.go @@ -35,7 +35,7 @@ func TestResourceFilename(t *testing.T) { func TestIsValid(t *testing.T) { - resource := newVaultResource() + resource := defaultVaultResource() resource.name = "/test/name" resource.resource = "secret" diff --git a/vault_resources.go b/vault_resources.go index 897d2a7..3f6b140 100644 --- a/vault_resources.go +++ b/vault_resources.go @@ -39,7 +39,7 @@ func (r vaultResources) size() int { // Set ... implementation for the parser func (r *vaultResources) Set(value string) error { - rn := new(vaultResource) + rn := defaultVaultResource() // step: extract the resource type and name if matched := resourceRegex.MatchString(value); !matched {