Was ist der einfachste Weg, eine Excel-Datei in Perl zu parsen? Die Umwandlung in eine Textdatei würde auch funktionieren.
Antworten
Zu viele Anzeigen?Der beste Weg ist die Verwendung von Tabellenkalkulation::ParseExcel .
Hier ist ein Beispiel:
#!/usr/bin/perl -w
use strict;
use warnings;
use Spreadsheet::ParseExcel;
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');
for my $worksheet ( $workbook->worksheets() ) {
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
for my $row ( $row_min .. $row_max ) {
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col );
next unless $cell;
print "Row, Col = ($row, $col)\n";
print "Value = ", $cell->value(), "\n";
print "Unformatted = ", $cell->unformatted(), "\n";
print "\n";
}
}
}
Um eine Excel-Datei mit Perl in Text zu konvertieren, würde ich empfehlen excel2txt das Spreadsheet::ParseExcel verwendet.
Ich hatte großes Glück mit Tabellenkalkulation::ParseExcel .
Am besten wäre es, wenn Sie ein Modul aus unserem geliebten CPAN .
Wann immer Sie haben ein Problem Ich frage mich sofort: Warum habe ich CPAN noch nicht überprüft?
Welches Modul das ist, kann ich nicht genau sagen, aber hier ist eine Liste, mit der Sie anfangen können, probieren Sie sie aus und sehen Sie, was funktioniert.
@echo off
ECHO CHECK THE VERSION FROM THE PROJECT
echo see also this [link][1]
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
ECHO The _Version is %_Version%
ECHO remove the output html files
del *.html /q
ECHO remove the output log files
del *.log /q
::pause
ECHO %0 > %0.log
ECHO %0.error.log >%0.error.log
set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
echo BaseDir is %BaseDir% 1>>%0.log 2>>%0.error.log
ECHO.
set LogLevel=3
echo LogLevel is %LogLevel% 1>>%0.log 2>>%0.error.log
ECHO.
::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
SET ExcelFileToParse="%BaseDir%\TODO.xls"
echo ExcelFileToParse is %ExcelFileToParse% 1>>%0.log 2>>%0.error.log
echo.
set OutputDir=%BaseDir%
echo OutputDir is %Outputdir% 1>>%0.log 2>>%0.error.log
echo.
ECHO SET THE UNICODE FOR PERL FOR UTF-8
SET PERL_UNICODE=S
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%
::set PerlScript=parseExcelToCsv.pl
set PerlScript=ExcelToHtml.pl
echo PerlScript is %PerlScript% 1>>%0.log 2>>%0.error.log
ECHO.
echo Action !!! 1>>%0.log 2>>%0.error.log
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log
%0.error.log
%0.log
::debug pause
use strict;
use Spreadsheet::ParseExcel;
use utf8 ;
use Encode ;
package ExcelToHtml ;
my $DebugLevel = 3 ;
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
my $minimumAmountOfCommandLineArgs = 3 ;
my ( $ExcelFileToParse , $OutputDir , $BaseFileName ) = ();
sub main {
logMsg("$0 SCRIPT START " , 1 );
#Action !!! call here methods
checkCommandLineArgs();
buildNames ();
ParseExcelAndlogMsgFiles ( $ExcelFileToParse , $BaseFileName , $OutputDir) ;
logMsg("$0 SCRIPT STOP " , 1 );
} #eof MAIN
sub buildNames
{
$ExcelFileToParse = "$ARGV[1]" ;
$OutputDir="$ARGV[2]" ;
$BaseFileName = $ExcelFileToParse ;
logMsg( "\$ExcelFileToParse is $ExcelFileToParse" ) ;
logMsg ( "\$OutputDir is $OutputDir" ) ;
logMsg ("\$BaseFileName is $BaseFileName \n" );
$BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/; #strip the directory part
logMsg ("\$BaseFileName is $BaseFileName ");
$BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ; #strip the file extension
logMsg ( "\$BaseFileName is $BaseFileName ");
} #eof sub buildNames
sub ParseExcelAndlogMsgFiles {
my $ExcelFileToParse = shift ;
my $BaseFileName = shift ;
my $OutputDir = shift ;
my $strToReturn = "";
my $parser = Spreadsheet::ParseExcel->new();
my $workbook = $parser->Parse("$ExcelFileToParse");
for my $worksheet ( $workbook->worksheets() ) {
my ( $row_min, $row_max ) = $worksheet->row_range();
my ( $col_min, $col_max ) = $worksheet->col_range();
for my $row ( $row_min .. $row_max ) {
my $rowStr = "" ;
for my $col ( $col_min .. $col_max ) {
my $cell = $worksheet->get_cell( $row, $col );
next unless $cell;
logMsg ( " Row, Col = ($row, $col)") ;
logMsg ( "\$cell->encoding() is " . $cell->encoding()) ;
logMsg (" Value = ", $cell->value() );
$rowStr .= makeCell($cell->value() ) ; #The Value
logMsg ( "Unformatted = ", $cell->unformatted()) ;
} #eof col
$rowStr = makeRow( $rowStr );
$strToReturn .= $rowStr ;
} #eof for my row
my $worksheetName = $worksheet->{'Name'} ;
my $FileSheet = "$OutputDir/$BaseFileName" . '.' . $worksheetName . '.' . 'html' ;
$strToReturn = makeTable ( $strToReturn );
$strToReturn = makeFile ( $strToReturn );
logMsg ( "\$strToReturn is $strToReturn " ) ;
PrintToFile ( $FileSheet, $strToReturn ) ;
$strToReturn = "" ;
$FileSheet = "" ;
} #eof for my worksheet
} #eof sub
sub trim
{
$_[0]=~s/^\s+//;
$_[0]=~s/\s+$//;
return $_[0];
}
# =========================================== eof sub trim
# Action !!!
sub makeRow
{
my $row = shift ;
return "<tr>" . $row . "</tr> \n" ;
}
sub makeCell
{
my $cell = shift ;
return "<td>" . "$cell" . " </td>" ;
}
sub makeTable
{
my $table = shift ;
return "\n <table> " . $table . "</table> \n" ;
}
sub makeFile
{
my $file = shift ;
$file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ;
return $file ;
}
sub checkCommandLineArgs
{
if ( @ARGV != $minimumAmountOfCommandLineArgs )
{
logErrorMsg ( "Not enougn command line args supplied exit 1" , 1 );
die "usage: $0 1 args \n" ;
exit 1 ;
} #eof if
else
{
foreach (@ARGV) { logMsg ( "$_ " ) ; }
} #eof else if ( @ARGV != 4 )
} #eof sub checkCommandLineArgs
#log a message according to the DebugLevel
sub logMsg
{
my $msg = shift ;
my $importance = shift ;
my $CurrentPerlScriptName = "$0" ;
my $niceMonth = GetANiceMonth ();
my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" . '.log' ;
$msg = GetANiceTime () .' --- ' . $msg . " \n" ;
if ( $importance == 1 )
{
$msg = "\n============================================================ \n" . $msg ;
$msg = $msg . "============================================================ \n" ;
}
#debug nothing
if ( $DebugLevel == 0 ) { return ; }
#just logMsg the message
if ( $DebugLevel == 1 )
{
logMsg ( $msg );
} #eof if ( $DebugLevel == 1 )
#logMsg the message in
if ( $DebugLevel == 2 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n";
print LOG $msg ;
close LOG;
}
#logMsg the message in
if ( $DebugLevel == 3 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n";
print LOG $msg ;
close LOG;
print $msg ;
} #eof if ( $DebugLevel == 3 )
} #eof sub logMsg
#log a message according to the DebugLevel
sub logErrorMsg
{
my $errorMsg = shift ;
my $importance = shift ;
my $CurrentPerlScriptName = "$0" ;
my $niceMonth = GetANiceMonth ();
my $errorLogFile = "$CurrentPerlScriptName" . "_" . "$niceMonth" . '.error.log' ;
$errorMsg = GetANiceTime () .' --- ' . $errorMsg . " \n" ;
if ( $importance == 1 )
{
$errorMsg = "\n============================================================ \n" . $errorMsg ;
$errorMsg= $errorMsg. "============================================================ \n" ;
}
#debug nothing
if ( $DebugLevel == 0 ) { return ; }
#just logMsg the message
if ( $DebugLevel == 1 ) { print $errorMsg ; }
#logMsg the message in a error log file
if ( $DebugLevel == 2 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n";
print ERRLOG $errorMsg ;
close ERRLOG;
}
#logMsg the message in
if ( $DebugLevel == 3 )
{
#READ ALL ROWS OF A FILE TO ALIST
open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n";
print ERRLOG $errorMsg ;
close ERRLOG;
print $errorMsg ;
} #eof if ( $DebugLevel == 3 )
} #eof sub logErrorMsg
#GET A NICE TIME
sub GetANiceTime {
# Purpose: returns the time in yyyymmdd-format
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#---- change 'month'- and 'year'-values to correct format ----
$min = "0$min" if ($min < 10);
$hour = "0$hour" if ($hour < 10);
$mon = $mon + 1;
$mon = "0$mon" if ($mon < 10);
$year = $year + 1900;
$mday = "0$mday" if ($mday < 10);
return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec";
} #eof sub GetANiceTime
sub GetANiceMonth {
# Purpose: returns the time in yyyymmdd-format
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
#---- change 'month'- and 'year'-values to correct format ----
$mon = $mon + 1;
$mon = "0$mon" if ($mon < 10);
$year = $year + 1900;
return "$year\.$mon" ;
} #eof sub GetANiceTime
sub PrintToFile {
my $FileOutput = shift ;
my $StringToPrint = shift ;
#READ ALL ROWS OF A FILE TO ALIST
open (FILEOUTPUT, ">$FileOutput") ||
print "could not open the \$FileOutput $FileOutput!\n";
print FILEOUTPUT $StringToPrint ;
close FILEOUTPUT ;
#debug $strToReturn .= $StringToPrint;
}
# =========================================== eof sub PrintToFile
#Action !!!
main();
1 ;
__END__
#VersionHistory:
#1.0. --- Yordan Georgiev --- Initial creation
#1.1 --- Yordan Georgiev --- Added conditional logging
http://search.cpan.org/dist/Spreadsheet-Read/Read.pm
Spreadsheet::Read versucht, transparent zu lesen jede Tabellenkalkulation und gibt deren Inhalt auf universelle Weise zurück, unabhängig von dem Parsing-Modul, das die eigentliche Tabellenkalkulation scannt.