Show actual maximum column lengths in Redshift


#!/bin/bash
#
# Many third-party data sources do lazy character column definitions, making them varchar(65535), which is stoopid.
# This little script will read in the DDL for a table, parse out the columns defined like this and then hork out a
# SQL script that discovers the actual max length of the actual data for more efficient DDL.  Suggested use is to
# run this script and output it to a xxxx.sql file, then \i that file in Redshift while in the correct schema.
#
# Assumes tablename will be passed on the command line and a file, tablename.sql will exist in this dir.

TABLENAME=${1}

if [ "${TABLENAME}" == "" ]
then
	echo -e "\nUSAGE: $0 redshiftTableName\n\nExpects redshiftTableName.sql to exist in this directory.\n\n"
	exit 1
fi

COLNAMES=`grep 65535 ${TABLENAME}.sql | cut -f2 -d","|cut -f1 -d" "`

for COL in ${COLNAMES}
do
 echo "SELECT MAX(LEN(${COL})) AS max_${COL}  FROM ${TABLENAME};"
done