Skip to content

Commit

Permalink
Add Helm chart for Airflow
Browse files Browse the repository at this point in the history
- add EditorConfig files
- airflow.all.yaml is splited in different chart templates
- Propagate server config from env variable if defined
- statup script can receive some config by ENV variable
  (name of rabbitmq/postgres server), at least everything
  derives from the same source
- username and password can inherit from env
- fix probable buggy fernet key definition, missing enum34
  with python 2 installation
- change format of template in airflow.cfg (airflow.cfg.in)
- use configmap to set default environment variables
- parameterize number of workers
- support url prefix for flower and web ui
  allow ingress in endpoint such as /airflow and /flower
- use python 3 in docker
- optional git sync to mirror a git repo for DAGs
- debian stretch (python >3.4)
- load example true/false and flexible scheduler run nums
  • Loading branch information
gsemet committed Oct 25, 2017
1 parent 9910834 commit 904f312
Show file tree
Hide file tree
Showing 16 changed files with 662 additions and 37 deletions.
19 changes: 19 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
root = true

[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

[*.md]
trim_trailing_whitespace = false

[*.yaml]
indent_size = 2

[Makefile]
indent_style = tab
indent_size = 4
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*~
build
rootfs
35 changes: 19 additions & 16 deletions Dockerfile.template
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# BUILD: docker build --rm -t mumoshu/kube-airflow
# SOURCE: https://github.com/mumoshu/kube-airflow

FROM debian:jessie
FROM debian:stretch
MAINTAINER Yusuke KUOKA <ykuoka@gmail.com>

# Never prompts the user for choices on installation/configuration of packages
Expand All @@ -21,12 +21,12 @@ ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_ALL en_US.UTF-8

RUN set -ex \
&& buildDeps=' \
python-pip \
python-dev \
python3-pip \
python3-dev \
libkrb5-dev \
libsasl2-dev \
libxml2-dev \
Expand All @@ -46,20 +46,21 @@ RUN set -ex \
curl \
netcat \
locales \
&& apt-get install -yqq -t jessie-backports python-requests libpq-dev \
&& apt-get install -yqq -t jessie-backports libpq-dev git \
&& sed -i 's/^# en_US.UTF-8 UTF-8$/en_US.UTF-8 UTF-8/g' /etc/locale.gen \
&& locale-gen \
&& update-locale LANG=en_US.UTF-8 LC_ALL=en_US.UTF-8 \
&& useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow \
&& pip uninstall setuptools \
&& pip install setuptools==33.1.1 \
&& pip install pytz==2015.7 \
&& pip install cryptography \
&& pip install pyOpenSSL \
&& pip install ndg-httpsclient \
&& pip install pyasn1 \
&& pip install psycopg2 \
&& pip install airflow[celery,postgresql,hive]==$AIRFLOW_VERSION \
&& pip3 install --upgrade pip enum34 'setuptools!=36.0.0' \
&& pip3 install pytz==2015.7 \
&& pip3 install cryptography \
&& pip3 install requests \
&& pip3 install pyOpenSSL \
&& pip3 install ndg-httpsclient \
&& pip3 install pyasn1 \
&& pip3 install psycopg2 \
&& pip3 install airflow[celery,postgresql,hive] \
&& pip3 install click \
&& apt-get remove --purge -yqq $buildDeps libpq-dev \
&& apt-get clean \
&& rm -rf \
Expand All @@ -75,10 +76,12 @@ ENV KUBECTL_VERSION %%KUBECTL_VERSION%%
RUN curl -L -o /usr/local/bin/kubectl https://storage.googleapis.com/kubernetes-release/release/v${KUBECTL_VERSION}/bin/linux/amd64/kubectl && chmod +x /usr/local/bin/kubectl

COPY script/entrypoint.sh ${AIRFLOW_HOME}/entrypoint.sh
COPY config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
COPY config/airflow.cfg.in ${AIRFLOW_HOME}/airflow.cfg.in
COPY script/git-sync ${AIRFLOW_HOME}/git-sync

RUN chown -R airflow: ${AIRFLOW_HOME} \
&& chmod +x ${AIRFLOW_HOME}/entrypoint.sh
&& chmod +x ${AIRFLOW_HOME}/entrypoint.sh \
&& chmod +x ${AIRFLOW_HOME}/git-sync

EXPOSE 8080 5555 8793

Expand Down
28 changes: 26 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,42 @@ ALIAS ?= $(REPOSITORY):$(AIRFLOW_VERSION)-$(KUBECTL_VERSION)
BUILD_ROOT ?= build/$(TAG)
DOCKERFILE ?= $(BUILD_ROOT)/Dockerfile
ROOTFS ?= $(BUILD_ROOT)/rootfs
AIRFLOW_CONF ?= $(BUILD_ROOT)/config/airflow.cfg
AIRFLOW_CONF ?= $(BUILD_ROOT)/config/airflow.cfg.in
ENTRYPOINT_SH ?= $(BUILD_ROOT)/script/entrypoint.sh
DOCKER_CACHE ?= docker-cache
SAVED_IMAGE ?= $(DOCKER_CACHE)/image-$(AIRFLOW_VERSION)-$(KUBECTL_VERSION).tar

NAMESPACE ?= airflow-dev
HELM_APPLICATION_NAME ?= airflow
HELM_CONFIG ?= config.yaml
CHART_LOCATION ?= ./airflow

.PHONY: build clean

clean:
rm -Rf build

helm-install:
helm repo update
helm install $(CHART_LOCATION) \
--version=v0.1.0 \
--name=$(HELM_APPLICATION_NAME) \
--namespace=$(NAMESPACE) \
--debug \
-f $(HELM_CONFIG)

helm-upgrade:
helm upgrade -f $(HELM_CONFIG) \
--debug \
$(HELM_APPLICATION_NAME) \
$(CHART_LOCATION)

helm-ls:
helm ls --all $(HELM_APPLICATION_NAME)

helm-uninstall:
helm del --purge $(HELM_APPLICATION_NAME)

build: $(DOCKERFILE) $(ROOTFS) $(AIRFLOW_CONF) $(ENTRYPOINT_SH)
cd $(BUILD_ROOT) && docker build -t $(IMAGE) . && docker tag $(IMAGE) $(ALIAS)

Expand All @@ -37,7 +61,7 @@ $(ROOTFS): $(BUILD_ROOT)

$(AIRFLOW_CONF): $(BUILD_ROOT)
mkdir -p $(shell dirname $(AIRFLOW_CONF))
cp config/airflow.cfg $(AIRFLOW_CONF)
cp config/airflow.cfg.in $(AIRFLOW_CONF)

$(ENTRYPOINT_SH): $(BUILD_ROOT)
mkdir -p $(shell dirname $(ENTRYPOINT_SH))
Expand Down
35 changes: 30 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,50 @@ This is useful when you'd want:
This repository contains:

* **Dockerfile(.template)** of [airflow](https://github.com/apache/incubator-airflow) for [Docker](https://www.docker.com/) images published to the public [Docker Hub Registry](https://registry.hub.docker.com/).
* **airflow.all.yaml** for creating Kubernetes services and deployments to run Airflow on Kubernetes
* **airflow.all.yaml** for manual creating Kubernetes services and deployments to run Airflow on Kubernetes
* **Helm Chart** for deployments using Helm

## Informations

* Highly inspired by the great work [puckel/docker-airflow](https://github.com/puckel/docker-airflow)
* Based on Debian Jessie official Image [debian:jessie](https://registry.hub.docker.com/_/debian/) and uses the official [Postgres](https://hub.docker.com/_/postgres/) as backend and [RabbitMQ](https://hub.docker.com/_/rabbitmq/) as queue
* Following the Airflow release from [Python Package Index](https://pypi.python.org/pypi/airflow)

## Installation
## Manual Installation

Create all the deployments and services for Airflow:

kubectl create -f airflow.all.yaml
kubectl create -f airflow.all.yaml

## Build
## Helm Deployment

Ensure your helm installation is done, you may need to have `TILLER_NAMESPACE` set a an environment
variable.

Deploy to Kubernetes using:

make helm-install NAMESPACE=airflow

Remove from the clusting using:

make helm-uninstall

### Helm configuration customization

Helm allow to overload the configuration to adapt to your environment. You probably want to specify
your own ingress configuration for instance.

Note:

Do NOT use characters such as " (double quote), ' (simple quote), / (slash) or \ (backslash)
in your passwords


## Build Docker image

`git clone` this repository and then just run:

make build
make build

## Usage

Expand Down
4 changes: 4 additions & 0 deletions airflow/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
apiVersion: v1
description: Airflow installation
name: airflow
version: v0.1.0
29 changes: 29 additions & 0 deletions airflow/templates/configmaps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: "{{ .Values.airflow.prefix -}}airflow-cfg"
data:
AIRFLOW_HOME: "/usr/local/airflow"
{{- if .Values.airflow.fernet_key }}
FERNET_KEY: "{{- .Values.airflow.fernet_key -}}"
{{- end }}
RABBITMQ_HOST: "{{- .Values.airflow.prefix -}}{{- .Values.db.rabbitmq.basename -}}"
{{- if .Values.db.rabbitmq.user }}
RABBITMQ_CREDS: "{{- .Values.db.rabbitmq.user -}}:{{- .Values.db.rabbitmq.password -}}"
{{- end }}
POSTGRES_HOST: "{{- .Values.airflow.prefix -}}{{- .Values.db.postgres.basename -}}"
{{- if .Values.db.postgres.user }}
POSTGRES_CREDS: "{{- .Values.db.postgres.user -}}:{{- .Values.db.postgres.password -}}"
{{- end }}
{{- if .Values.airflow.init_retry_loop }}
TRY_LOOP: "{{ .Values.airflow.init_retry_loop -}}"
{{- end }}
FLOWER_URL_PREFIX: "{{ .Values.flower.url_prefix -}}"
AIRFLOW_URL_PREFIX: "{{ .Values.airflow.url_prefix -}}"
{{ if .Values.dags.git_sync_enabled }}
GIT_SYNC_REPO: "{{ .Values.dags.git_repo }}"
GIT_SYNC_BRANCH: "{{ .Values.dags.git_branch }}"
GIT_SYNC_WAIT: "{{ .Values.dags.poll_interval_sec }}"
GIT_SYNC_DEBUG: "{{ .Values.dags.git_sync_debug }}"
{{ end }}
LOAD_DAGS_EXAMPLES: "{{ .Values.dags.load_examples }}"
155 changes: 155 additions & 0 deletions airflow/templates/deployments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: {{ .Values.airflow.prefix -}}postgres
spec:
replicas: 1
template:
metadata:
labels:
app: airflow
tier: db
spec:
containers:
- name: {{ .Values.airflow.prefix -}}postgres
image: postgres
ports:
- name: {{ .Values.airflow.prefix -}}postgres
containerPort: 5432
env:
- name: POSTGRES_USER
value: "{{ .Values.db.postgres.user -}}"
- name: POSTGRES_PASSWORD
value: "{{ .Values.db.postgres.password -}}"
- name: POSTGRES_DB
value: "{{ .Values.db.postgres.database -}}"
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: {{ .Values.airflow.prefix -}}rabbitmq
spec:
replicas: 1
template:
metadata:
labels:
app: airflow
tier: rabbitmq
spec:
restartPolicy: Always
containers:
- name: {{ .Values.airflow.prefix -}}rabbitmq
image: rabbitmq:3-management
ports:
- name: {{ .Values.airflow.prefix -}}management
containerPort: 15672
- name: {{ .Values.airflow.prefix -}}node
containerPort: 5672
env:
- name: RABBITMQ_DEFAULT_USER
value: "{{ .Values.db.rabbitmq.user -}}"
- name: RABBITMQ_DEFAULT_PASS
value: "{{ .Values.db.rabbitmq.password -}}"
- name: RABBITMQ_DEFAULT_VHOST
value: "airflow"
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: {{ .Values.airflow.prefix -}}web
spec:
replicas: 1
template:
metadata:
labels:
app: airflow
tier: web
spec:
restartPolicy: Always
containers:
- name: {{ .Values.airflow.prefix -}}web
image: {{ .Values.airflow.image }}
imagePullPolicy: {{ .Values.airflow.image_pull_policy}}
envFrom:
- configMapRef:
name: {{ .Values.airflow.prefix }}airflow-cfg
ports:
- name: {{ .Values.airflow.prefix -}}web
containerPort: 8080
args: ["webserver"]
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: {{ .Values.airflow.prefix -}}flower
spec:
replicas: 1
template:
metadata:
labels:
app: airflow
tier: flower
spec:
restartPolicy: Always
containers:
- name: {{ .Values.airflow.prefix -}}flower
image: {{ .Values.airflow.image }}
imagePullPolicy: {{ .Values.airflow.image_pull_policy}}
env:
- name: FLOWER_PORT
value: "5555"
envFrom:
- configMapRef:
name: {{ .Values.airflow.prefix }}airflow-cfg
ports:
- name: {{ .Values.airflow.prefix -}}flower
containerPort: 5555
args: ["flower"]
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: {{ .Values.airflow.prefix -}}scheduler
spec:
replicas: 1
template:
metadata:
labels:
app: airflow
tier: scheduler
spec:
restartPolicy: Always
containers:
- name: {{ .Values.airflow.prefix -}}scheduler
image: {{ .Values.airflow.image }}
imagePullPolicy: {{ .Values.airflow.image_pull_policy}}
# volumes:
# - /localpath/to/dags:/usr/local/airflow/dags
envFrom:
- configMapRef:
name: {{ .Values.airflow.prefix }}airflow-cfg
args: ["scheduler", "-n", "{{ .Values.airflow.scheduler_num_runs }}"]
---
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: {{ .Values.airflow.prefix -}}worker
spec:
replicas: {{ .Values.celery.num_workers }}
template:
metadata:
labels:
app: airflow
tier: worker
spec:
restartPolicy: Always
containers:
- name: {{ .Values.airflow.prefix -}}worker
image: {{ .Values.airflow.image }}
imagePullPolicy: {{ .Values.airflow.image_pull_policy}}
# volumes:
# - /localpath/to/dags:/usr/local/airflow/dags
envFrom:
- configMapRef:
name: {{ .Values.airflow.prefix }}airflow-cfg
args: ["worker"]
Loading

0 comments on commit 904f312

Please sign in to comment.