from pyspark.sql import SparkSession
= SparkSession.builder.getOrCreate()
spark
= [
d 12114, 'Anne', 21, 1.56, 8, 9, 10, 9, 'Economics', 'SC'),
(13007, 'Adrian', 23, 1.82, 6, 6, 8, 7, 'Economics', 'SC'),
(10045, 'George', 29, 1.77, 10, 9, 10, 7, 'Law', 'SC'),
(12459, 'Adeline', 26, 1.61, 8, 6, 7, 7, 'Law', 'SC'),
(10190, 'Mayla', 22, 1.67, 7, 7, 7, 9, 'Design', 'AR'),
(11552, 'Daniel', 24, 1.75, 9, 9, 10, 9, 'Design', 'AR')
(
]
= [
columns 'StudentID', 'Name', 'Age', 'Height', 'Score1',
'Score2', 'Score3', 'Score4', 'Course', 'Department'
]
= spark.createDataFrame(d, columns)
students = False) students.show(truncate
[Stage 0:> (0 + 1) / 1]
+---------+-------+---+------+------+------+------+------+---------+----------+
|StudentID|Name |Age|Height|Score1|Score2|Score3|Score4|Course |Department|
+---------+-------+---+------+------+------+------+------+---------+----------+
|12114 |Anne |21 |1.56 |8 |9 |10 |9 |Economics|SC |
|13007 |Adrian |23 |1.82 |6 |6 |8 |7 |Economics|SC |
|10045 |George |29 |1.77 |10 |9 |10 |7 |Law |SC |
|12459 |Adeline|26 |1.61 |8 |6 |7 |7 |Law |SC |
|10190 |Mayla |22 |1.67 |7 |7 |7 |9 |Design |AR |
|11552 |Daniel |24 |1.75 |9 |9 |10 |9 |Design |AR |
+---------+-------+---+------+------+------+------+------+---------+----------+