This repository has been archived by the owner on Aug 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add scripts for setting up hdfs (#738)
* Add scripts for setting up hdfs Co-authored-by: Bin Wang <[email protected]>
- Loading branch information
Showing
7 changed files
with
177 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/usr/bin/env python3 | ||
import os | ||
from argparse import ArgumentParser | ||
from configparser import ConfigParser | ||
from tempfile import NamedTemporaryFile | ||
|
||
try: | ||
import ansible_runner | ||
except ModuleNotFoundError: | ||
import textwrap | ||
import sys | ||
print(textwrap.dedent("""\ | ||
This script requires ansible and ansible-runner. | ||
You can install these modules using the following command: | ||
pip install --user ansible ansible-runner | ||
"""), file=sys.stderr) | ||
sys.exit(-1) | ||
|
||
from hillviewCommon import get_config | ||
|
||
# sections in inventory | ||
NAMENODE = "namenode" | ||
DATANODE = "datanode" | ||
DEFAULT_VARS = "all:vars" | ||
|
||
# specifies which hadoop version to use | ||
HADOOP_VERSION = "3.3.1" | ||
|
||
|
||
def write_inventory_file(config, file): | ||
inventory = ConfigParser(allow_no_value=True) | ||
|
||
# use the webserver node as namenode | ||
inventory.add_section(NAMENODE) | ||
inventory.set(NAMENODE, config.get_webserver().host) | ||
|
||
# use the workers as datanodes | ||
inventory.add_section(DATANODE) | ||
for worker in config.get_workers(): | ||
inventory.set(DATANODE, worker.host) | ||
|
||
inventory.add_section(DEFAULT_VARS) | ||
inventory.set(DEFAULT_VARS, "ansible_user", config.get_user()) | ||
inventory.set(DEFAULT_VARS, "hadoop_version", HADOOP_VERSION) | ||
|
||
inventory.write(file) | ||
file.flush() | ||
|
||
|
||
def get_deployment_dir(): | ||
""" | ||
Assumes there is a deployment folder in the project root that contains the needed ansible files. | ||
:return: The absolute path to the deployment folder. | ||
""" | ||
project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | ||
return os.path.join(project_root, "deployment") | ||
|
||
|
||
if __name__ == "__main__": | ||
parser = ArgumentParser() | ||
parser.add_argument("config", help="json cluster configuration file") | ||
args = parser.parse_args() | ||
config = get_config(parser, args) | ||
|
||
with NamedTemporaryFile(mode="w") as inventory_file: | ||
write_inventory_file(config, inventory_file) | ||
ansible_runner.run( | ||
project_dir=get_deployment_dir(), | ||
inventory=inventory_file.name, | ||
playbook="install-hdfs.yml" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[defaults] | ||
host_key_checking = false | ||
interpreter_python = /usr/bin/python3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
--- | ||
|
||
- hosts: all | ||
tasks: | ||
- name: Download hadoop | ||
get_url: | ||
url: "https://mirrors.ocf.berkeley.edu/apache/hadoop/common/hadoop-{{ hadoop_version }}/hadoop-{{ hadoop_version}}.tar.gz" | ||
dest: "/tmp/hadoop-{{ hadoop_version }}.tar.gz" | ||
|
||
- set_fact: HADOOP_HOME="{{ ansible_env.HOME }}/hadoop-{{ hadoop_version }}" | ||
|
||
- name: Unarchive hadoop | ||
unarchive: | ||
src: "/tmp/hadoop-{{ hadoop_version }}.tar.gz" | ||
dest: "{{ ansible_env.HOME }}" | ||
remote_src: true | ||
creates: "{{ HADOOP_HOME }}" | ||
|
||
- name: Set HADOOP_HOME environment variable | ||
lineinfile: | ||
path: "{{ ansible_env.HOME }}/.profile" | ||
line: "export HADOOP_HOME=$HOME/hadoop-{{ hadoop_version }}" | ||
|
||
- name: Set PATH environment variable | ||
lineinfile: | ||
path: "{{ ansible_env.HOME }}/.profile" | ||
line: "export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH" | ||
|
||
- name: Get JAVA_HOME | ||
shell: bash -lc 'echo $JAVA_HOME' | ||
register: echo_java_home | ||
changed_when: false | ||
failed_when: echo_java_home.stdout == "" | ||
|
||
- name: Set JAVA_HOME for hadoop environment | ||
lineinfile: | ||
path: "{{ HADOOP_HOME }}/etc/hadoop/hadoop-env.sh" | ||
regexp: "^# export JAVA_HOME=$" | ||
line: "export JAVA_HOME={{ echo_java_home.stdout }}" | ||
|
||
- name: Write workers config | ||
template: | ||
src: templates/workers.j2 | ||
dest: "{{ HADOOP_HOME }}/etc/hadoop/workers" | ||
|
||
- name: Write core-site.xml | ||
template: | ||
src: templates/core-site.xml.j2 | ||
dest: "{{ HADOOP_HOME }}/etc/hadoop/core-site.xml" | ||
|
||
- name: Write hdfs-site.xml | ||
template: | ||
src: templates/hdfs-site.xml.j2 | ||
dest: "{{ HADOOP_HOME }}/etc/hadoop/hdfs-site.xml" | ||
|
||
- hosts: namenode | ||
tasks: | ||
- name: Format hdfs if it hasn't been formatted | ||
shell: | ||
cmd: "{{ HADOOP_HOME }}/bin/hdfs namenode -format" | ||
creates: "{{ HADOOP_HOME }}/data/namenode/current/VERSION" | ||
|
||
- name: Check hdfs status | ||
shell: "{{ HADOOP_HOME }}/bin/hdfs dfsadmin -report" | ||
register: hdfs_report | ||
changed_when: false | ||
failed_when: false | ||
|
||
- name: Start hdfs | ||
shell: "{{ HADOOP_HOME }}/sbin/start-dfs.sh" | ||
when: hdfs_report.rc != 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | ||
|
||
<configuration> | ||
<property> | ||
<name>fs.defaultFS</name> | ||
<value>hdfs://{{ hostvars[groups['namenode'][0]]['ansible_default_ipv4']['address'] }}:9000</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> | ||
|
||
<configuration> | ||
<property> | ||
<name>dfs.namenode.name.dir</name> | ||
<value>{{ HADOOP_HOME }}/data/namenode</value> | ||
</property> | ||
<property> | ||
<name>dfs.datanode.data.dir</name> | ||
<value>{{ HADOOP_HOME }}/data/datanode</value> | ||
</property> | ||
<property> | ||
<name>dfs.replication</name> | ||
<value>2</value> | ||
</property> | ||
</configuration> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{% for node in groups["datanode"] %} | ||
{{ node }} | ||
{% endfor %} |