from pyspark.sql import SparkSession from pyspark.sql.functions import expr spark = SparkSession.builder \ .appName("FixedLengthETLTest") \ .getOrCreate() # Load the config file (assuming it's in JSON for this example) config_path = "/path/to/config.json" config = spark.read.json(config_path).collect()[0] # Define a schema using the config schema = ", ".join([f"{field.name} STRING" for field in config.fields]) # Read the fixed-length file fixed_length_df = spark.read.text("/path/to/fixed-length-file.txt") # Apply the schema by parsing the fields for field in config.fields: fixed_length_df = fixed_length_df.withColumn(field.name, expr(f"substring(value, {field.startPosition}, {field.length})")) fixed_length_df.show()