一、几个存储的概念
1. pv
A PersistentVolume is a piece of storage in the cluster that has been provisioned by an administrator.
2. pvc
A PersistentVolumeClaims is a request for storage by a user.
3. storageclass
A storageclass provides a way for administrator to describe the classes of storage they offer.
pv 支持两种方式的 provision,静态和动态
3.1 static
A cluster administrator create a number of PVs. 管理员先创建一堆 PVs,然后提供给 PVC 绑定
3.2 Dynamic
Base StorageClasses, when none of the static PVs the administrator created matches a user’s PersistentVolumeClaim, the cluster may try to dynamically provision a volume specially for the PVC.
二、StorageClass 是怎么完成 Dynamic provison 的
# k get sc rbd -o yaml --export=true
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: rbd
selfLink: /apis/storage.k8s.io/v1/storageclasses/rbd
parameters:
adminId: ***
adminSecretName: ***
adminSecretNamespace: ***
imageFeatures: ***
imageFormat: "***"
monitors: ***
pool: ***
userId: ***
userSecretName: ***
userSecretNamespace: ***
provisioner: ceph.com/rbd
reclaimPolicy: Delete
我们来重点关注这个 provisioner
在代码 https://github.com/kubernetes-incubator/external-storage/blob/v5.1.0/ceph/rbd/pkg/provision/provision.go#L114
中我们可以看到
pv := &v1.PersistentVolume{
ObjectMeta: metav1.ObjectMeta{
Name: options.PVName,
Annotations: map[string]string{
provisionerIDAnn: p.identity,
},
},
Spec: v1.PersistentVolumeSpec{
PersistentVolumeReclaimPolicy: options.PersistentVolumeReclaimPolicy,
AccessModes: options.PVC.Spec.AccessModes,
MountOptions: options.MountOptions,
Capacity: v1.ResourceList{
v1.ResourceName(v1.ResourceStorage): resource.MustParse(fmt.Sprintf("%dMi", sizeMB)),
},
PersistentVolumeSource: v1.PersistentVolumeSource{
RBD: rbd,
},
},
}
我们的 provisioner 会根据请求自动创建一个 pv 然后返回
三、pv 的生命周期
- Pending 不可用
- Available 可用
- Bound 已绑定
- Released pvc 已经删除,要先回收才能再次使用
- Failed 不能正确回收,或者在 pvc 释放之后不能删除
四、三种PV的访问模式
- ReadWriteOnce:是最基本的方式,可读可写,但只支持被单个Pod挂载。
- ReadOnlyMany:可以以只读的方式被多个Pod挂载。
- ReadWriteMany:这种存储可以以读写的方式被多个Pod共享。
五、PersistentVolumeController 部分代码分析
// provisionClaimOperation provisions a volume. This method is running in
// standalone goroutine and already has all necessary locks.
func (ctrl *PersistentVolumeController) provisionClaimOperation(claimObj interface{}) {
// get pv
claim, ok := claimObj.(*v1.PersistentVolumeClaim)
if !ok {
glog.Errorf("Cannot convert provisionClaimOperation argument to claim, got %#v", claimObj)
return
}
// 从 pvc 的 annotation 或 spec 获取 storageclass 名称
claimClass := v1helper.GetPersistentVolumeClaimClass(claim)
glog.V(4).Infof("provisionClaimOperation [%s] started, class: %q", claimToClaimKey(claim), claimClass)
// 返回 volume 插件和 storageclass 实例
plugin, storageClass, err := ctrl.findProvisionablePlugin(claim)
if err != nil {
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, events.ProvisioningFailed, err.Error())
glog.V(2).Infof("error finding provisioning plugin for claim %s: %v", claimToClaimKey(claim), err)
// The controller will retry provisioning the volume in every
// syncVolume() call.
return
}
// Add provisioner annotation so external provisioners know when to start
// 在注解中加入 storageclass
newClaim, err := ctrl.setClaimProvisioner(claim, storageClass)
if err != nil {
// Save failed, the controller will retry in the next sync
glog.V(2).Infof("error saving claim %s: %v", claimToClaimKey(claim), err)
return
}
claim = newClaim
if plugin == nil {
// findProvisionablePlugin returned no error nor plugin.
// This means that an unknown provisioner is requested. Report an event
// and wait for the external provisioner
msg := fmt.Sprintf("waiting for a volume to be created, either by external provisioner %q or manually created by system administrator", storageClass.Provisioner)
ctrl.eventRecorder.Event(claim, v1.EventTypeNormal, events.ExternalProvisioning, msg)
glog.V(3).Infof("provisioning claim %q: %s", claimToClaimKey(claim), msg)
return
}
// internal provisioning
// A previous doProvisionClaim may just have finished while we were waiting for
// the locks. Check that PV (with deterministic name) hasn't been provisioned
// yet.
// pvName = 'pvc' + pvc.UID
pvName := ctrl.getProvisionedVolumeNameForClaim(claim)
volume, err := ctrl.kubeClient.CoreV1().PersistentVolumes().Get(pvName, metav1.GetOptions{})
// pv 已经存在,返回
if err == nil && volume != nil {
// Volume has been already provisioned, nothing to do.
glog.V(4).Infof("provisionClaimOperation [%s]: volume already exists, skipping", claimToClaimKey(claim))
return
}
// Prepare a claimRef to the claim early (to fail before a volume is
// provisioned)
claimRef, err := ref.GetReference(scheme.Scheme, claim)
if err != nil {
glog.V(3).Infof("unexpected error getting claim reference: %v", err)
return
}
// Gather provisioning options
tags := make(map[string]string)
tags[CloudVolumeCreatedForClaimNamespaceTag] = claim.Namespace
tags[CloudVolumeCreatedForClaimNameTag] = claim.Name
tags[CloudVolumeCreatedForVolumeNameTag] = pvName
options := vol.VolumeOptions{
PersistentVolumeReclaimPolicy: *storageClass.ReclaimPolicy,
MountOptions: storageClass.MountOptions,
CloudTags: &tags,
ClusterName: ctrl.clusterName,
PVName: pvName,
PVC: claim,
Parameters: storageClass.Parameters,
}
// Refuse to provision if the plugin doesn't support mount options, creation
// of PV would be rejected by validation anyway
if !plugin.SupportsMountOption() && len(options.MountOptions) > 0 {
strerr := fmt.Sprintf("Mount options are not supported by the provisioner but StorageClass %q has mount options %v", storageClass.Name, options.MountOptions)
glog.V(2).Infof("Mount options are not supported by the provisioner but claim %q's StorageClass %q has mount options %v", claimToClaimKey(claim), storageClass.Name, options.MountOptions)
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, events.ProvisioningFailed, strerr)
return
}
// Provision the volume
provisioner, err := plugin.NewProvisioner(options)
if err != nil {
strerr := fmt.Sprintf("Failed to create provisioner: %v", err)
glog.V(2).Infof("failed to create provisioner for claim %q with StorageClass %q: %v", claimToClaimKey(claim), storageClass.Name, err)
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, events.ProvisioningFailed, strerr)
return
}
opComplete := util.OperationCompleteHook(plugin.GetPluginName(), "volume_provision")
// 还记得上面的 Provison 么?
// 继续参见 https://github.com/kubernetes-incubator/external-storage/blob/v5.1.0/ceph/rbd/pkg/provision/provision.go#L114
volume, err = provisioner.Provision()
opComplete(err)
if err != nil {
strerr := fmt.Sprintf("Failed to provision volume with StorageClass %q: %v", storageClass.Name, err)
glog.V(2).Infof("failed to provision volume for claim %q with StorageClass %q: %v", claimToClaimKey(claim), storageClass.Name, err)
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, events.ProvisioningFailed, strerr)
return
}
glog.V(3).Infof("volume %q for claim %q created", volume.Name, claimToClaimKey(claim))
// Create Kubernetes PV object for the volume.
if volume.Name == "" {
volume.Name = pvName
}
// Bind it to the claim
// 修改 pv 的值,绑定到 pvc
volume.Spec.ClaimRef = claimRef
volume.Status.Phase = v1.VolumeBound
volume.Spec.StorageClassName = claimClass
// Add annBoundByController (used in deleting the volume)
metav1.SetMetaDataAnnotation(&volume.ObjectMeta, annBoundByController, "yes")
metav1.SetMetaDataAnnotation(&volume.ObjectMeta, annDynamicallyProvisioned, plugin.GetPluginName())
// Try to create the PV object several times
for i := 0; i < ctrl.createProvisionedPVRetryCount; i++ {
glog.V(4).Infof("provisionClaimOperation [%s]: trying to save volume %s", claimToClaimKey(claim), volume.Name)
var newVol *v1.PersistentVolume
if newVol, err = ctrl.kubeClient.CoreV1().PersistentVolumes().Create(volume); err == nil || apierrs.IsAlreadyExists(err) {
// Save succeeded.
if err != nil {
glog.V(3).Infof("volume %q for claim %q already exists, reusing", volume.Name, claimToClaimKey(claim))
err = nil
} else {
glog.V(3).Infof("volume %q for claim %q saved", volume.Name, claimToClaimKey(claim))
_, updateErr := ctrl.storeVolumeUpdate(newVol)
if updateErr != nil {
// We will get an "volume added" event soon, this is not a big error
glog.V(4).Infof("provisionClaimOperation [%s]: cannot update internal cache: %v", volume.Name, updateErr)
}
}
break
}
// Save failed, try again after a while.
glog.V(3).Infof("failed to save volume %q for claim %q: %v", volume.Name, claimToClaimKey(claim), err)
time.Sleep(ctrl.createProvisionedPVInterval)
}
if err != nil {
// Save failed. Now we have a storage asset outside of Kubernetes,
// but we don't have appropriate PV object for it.
// Emit some event here and try to delete the storage asset several
// times.
strerr := fmt.Sprintf("Error creating provisioned PV object for claim %s: %v. Deleting the volume.", claimToClaimKey(claim), err)
glog.V(3).Info(strerr)
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, events.ProvisioningFailed, strerr)
var deleteErr error
var deleted bool
for i := 0; i < ctrl.createProvisionedPVRetryCount; i++ {
deleted, deleteErr = ctrl.doDeleteVolume(volume)
if deleteErr == nil && deleted {
// Delete succeeded
glog.V(4).Infof("provisionClaimOperation [%s]: cleaning volume %s succeeded", claimToClaimKey(claim), volume.Name)
break
}
if !deleted {
// This is unreachable code, the volume was provisioned by an
// internal plugin and therefore there MUST be an internal
// plugin that deletes it.
glog.Errorf("Error finding internal deleter for volume plugin %q", plugin.GetPluginName())
break
}
// Delete failed, try again after a while.
glog.V(3).Infof("failed to delete volume %q: %v", volume.Name, deleteErr)
time.Sleep(ctrl.createProvisionedPVInterval)
}
if deleteErr != nil {
// Delete failed several times. There is an orphaned volume and there
// is nothing we can do about it.
strerr := fmt.Sprintf("Error cleaning provisioned volume for claim %s: %v. Please delete manually.", claimToClaimKey(claim), deleteErr)
glog.V(2).Info(strerr)
ctrl.eventRecorder.Event(claim, v1.EventTypeWarning, events.ProvisioningCleanupFailed, strerr)
}
} else {
glog.V(2).Infof("volume %q provisioned for claim %q", volume.Name, claimToClaimKey(claim))
msg := fmt.Sprintf("Successfully provisioned volume %s using %s", volume.Name, plugin.GetPluginName())
ctrl.eventRecorder.Event(claim, v1.EventTypeNormal, events.ProvisioningSucceeded, msg)
}
}