#!/bin/bash # # Many third-party data sources do lazy character column definitions, making them varchar(65535), which is stoopid. # This little script will read in the DDL for a table, parse out the columns defined like this and then hork out a # SQL script that discovers the actual max length of the actual data for more efficient DDL. Suggested use is to # run this script and output it to a xxxx.sql file, then \i that file in Redshift while in the correct schema. # # Assumes tablename will be passed on the command line and a file, tablename.sql will exist in this dir. TABLENAME=${1} if [ "${TABLENAME}" == "" ] then echo -e "\nUSAGE: $0 redshiftTableName\n\nExpects redshiftTableName.sql to exist in this directory.\n\n" exit 1 fi COLNAMES=`grep 65535 ${TABLENAME}.sql | cut -f2 -d","|cut -f1 -d" "` for COL in ${COLNAMES} do echo "SELECT MAX(LEN(${COL})) AS max_${COL} FROM ${TABLENAME};" done