"""PySparkWordCount.py"""
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

textFile = "/word_count/*"  # text files
spark = SparkSession.builder.appName("PySparkWordCount").getOrCreate()
textData = spark.read.text(textFile).cache()

split_col = split(textData.value, " ")
exp_col = explode(split_col).alias("exploded")
group_data = textData.select(exp_col).groupBy("exploded")
count_frame = group_data.count()
for row in count_frame.collect():
	print("%s, %s" % (row['exploded'], row['count']))

spark.stop()