File build.sh of Package spark
#!/bin/bash
set -xe
PROJECT_PREFIX=`readlink -e .`
cd src/spark-${VERSION}
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
dev/change-scala-version.sh 2.10
$PROJECT_PREFIX/kit/apache-maven-3.3.9/bin/mvn \
-Dmaven.repo.local=$PROJECT_PREFIX/kit/m2 --settings $PROJECT_PREFIX/kit/m2/settings.xml --strict-checksums \
-o -DskipTests -Phive -Phive-thriftserver -Dscala-2.10 install
# The following script was copied from make-distribution.sh in the Spark source repository
SPARK_HOME="$(pwd)"
DISTDIR="$SPARK_HOME/dist"
SPARK_HIVE=1
# Make directories
rm -rf "$DISTDIR"
mkdir -p "$DISTDIR/jars"
echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DISTDIR/RELEASE"
echo "Build flags: $@" >> "$DISTDIR/RELEASE"
# Copy jars
cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
# Copy extra dependencies
cp "$SPARK_HOME"/../../kit/m2/com/yammer/metrics/metrics-core/2.2.0/metrics-core-2.2.0.jar "$DISTDIR/jars/"
cp "$SPARK_HOME"/../../kit/m2/org/apache/kafka/kafka_2.10/0.8.2.1/kafka_2.10-0.8.2.1.jar "$DISTDIR/jars/"
# Only create the yarn directory if the yarn artifacts were build.
if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then
mkdir "$DISTDIR/yarn"
cp "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar "$DISTDIR/yarn"
fi
# Copy examples and dependencies
mkdir -p "$DISTDIR/examples/jars"
cp "$SPARK_HOME"/examples/target/scala*/jars/* "$DISTDIR/examples/jars"
# Deduplicate jars that have already been packaged as part of the main Spark dependencies.
for f in "$DISTDIR"/examples/jars/*; do
name=$(basename "$f")
if [ -f "$DISTDIR/jars/$name" ]; then
rm "$DISTDIR/examples/jars/$name"
fi
done
# Copy example sources (needed for python and SQL)
mkdir -p "$DISTDIR/examples/src/main"
cp -r "$SPARK_HOME/examples/src/main" "$DISTDIR/examples/src/"
# Copy license and ASF files
cp "$SPARK_HOME/LICENSE" "$DISTDIR"
cp -r "$SPARK_HOME/licenses" "$DISTDIR"
cp "$SPARK_HOME/NOTICE" "$DISTDIR"
if [ -e "$SPARK_HOME/CHANGES.txt" ]; then
cp "$SPARK_HOME/CHANGES.txt" "$DISTDIR"
fi
# Copy data files
cp -r "$SPARK_HOME/data" "$DISTDIR"
# Copy other things
mkdir "$DISTDIR/conf"
cp "$SPARK_HOME"/conf/*.template "$DISTDIR/conf"
cp "$SPARK_HOME/README.md" "$DISTDIR"
cp -r "$SPARK_HOME/bin" "$DISTDIR"
cp -r "$SPARK_HOME/python" "$DISTDIR"
# This file is required for command-line spark scripts to work
touch "$DISTDIR"/RELEASE