#!/usr/bin/perl # Convert CSV to TAB delimited # 2002-07-10 JPV # $ver = "v2.0"; # 2002-10-16 JPV Had to re-write, as the MRE RegEx was not # correctly parsing escaped quotes ( "" ) within quoted strings. # This is about 5 times slower than the RegEx, but it parses right. #$ver = "v2.1"; # 2002-12-23 JPV BUGFIXS for OUTFILE and separatores. #$ver = "v3.0"; # 2002-12-30 JPV Changed back to regex (MRE2) from quotewords. #$ver = "v3.1"; # 2003-01-02 JPV Fixed extra trailing tab on each line #$ver = "v3.2"; # 2003-03-21 JPV Added -d $ver = "v3.2a"; # 2003-04-01 JPV Annoying bugfix for -h checking # Original RegEx from _Mastering_Regular_Expressions_ and # Eric Soulliard 2002-04-01 # Text::ParseWords/quotewords is from _Perl_Cookbook_ 1.15 pg 31 # Other code from csv2html.pl (Eric Soulliard) # See sub parse_csv_mre2 for details about the regex to parse CSV. ########################################################################## (($myname = $0) =~ s/^.*(\/|\\)//ig); # remove up to last "\" or "/" $Greeting = ("$myname $ver Copyright 2002 JP Vossen (http://www.jpsdomain.org/)\n"); $Greeting .= (" Licensed under the GNU GENERAL PUBLIC LICENSE:\n"); $Greeting .= (" See http://www.gnu.org/copyleft/gpl.html for full text and details.\n"); if (("@ARGV" =~ /\?/) || ("@ARGV" =~ / -h/) || "@ARGV" =~ /--help/) { print STDERR ("\n$Greeting\n"); print STDERR <<"EoN"; # Various usage notes Usage: $myname (-i {infile}) (-o {outfile}) (-q) -i {infile} = Use infile as the input file, otherwise use STDIN. -o {outfile} = Use outfile as the output file, otherwise use STDOUT. -d {delimiter} = Use the specificed delimiter instead of TAB. -q = Be quiet about it. EoN die ("\n"); } ###use Text::ParseWords; # Use this to parse CSV input use Getopt::Std; # Use Perl5 built-in program argument handler getopts('i:o:d:q'); # Define possible args. if (! $opt_i) { $opt_i = "-"; } # If no input file specified, use STDIN if (! $opt_o) { $opt_o = "-"; } # If no output file specified, use STDOUT open (INFILE, "$opt_i") or die ("$myname: error opening $opt_i for input: $!\n"); open (OUTFILE, ">$opt_o") or die ("$myname: error opening $opt_o for output: $!\n"); $delimiter=$opt_d||"\t"; # Use the specified delimiter or TAB if (! $opt_q) { print STDERR ("\n$Greeting\n"); } while ($aline = ) { chomp($aline); ### @arecord = quotewords(",", $KeepSep, $aline); @arecord = &parse_csv_mre2 ($aline); $outline = join ($delimiter, @arecord); print OUTFILE "$outline\n"; } if (! $opt_q) { print STDERR ("\n\a$myname finished in ",time()-$^T," seconds.\n"); } #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ sub parse_csv_mre2 { # Called like: @arecord = &parse_csv_mre2 ($aline); # Regex to parse CSV from _Mastering_Regular_Expressions,_Second_Edition_; # page 271. See http://regex.info/ esp. http://regex.info/dlisting.cgi?id=1253) #if (@_[0] eq undef()) { warn ("$myname: empty variable passed to parse_csv_mre2!\n"); } if (@_[0] eq undef()) { return(); } my $line = @_[0]; my @parsedline = (); my $field = ''; # See top for details about the regex while ($line =~ m{ \G(?:^|,) (?: # Either a double-quoted field (with "" for each ")... " # field's opening quote ( (?> [^"]* ) (?> "" [^"]* )* ) " # field's closing quote # ..or... | # ... some non-quote/non-comma text.... ( [^",]* ) ) }gx) { # OK, done with regex, NOW what... if (defined $2) { # Got some non-quote/non-comma text $field = $2; } else { # Got escaped quotes and stuff $field = $1; $field =~ s/""/"/g; } push (@parsedline, $field); } # end of while block return (@parsedline); } # end of sub parse_csv_mre2