12 Stimmen

Was ist der beste Weg, um Excel-Dateien in Perl zu parsen?

Was ist der einfachste Weg, eine Excel-Datei in Perl zu parsen? Die Umwandlung in eine Textdatei würde auch funktionieren.

26voto

jmcnamara Punkte 33232

Der beste Weg ist die Verwendung von Tabellenkalkulation::ParseExcel .

Hier ist ein Beispiel:

#!/usr/bin/perl -w

use strict;
use warnings;

use Spreadsheet::ParseExcel;

my $parser   = Spreadsheet::ParseExcel->new();
my $workbook = $parser->parse('Book1.xls');

for my $worksheet ( $workbook->worksheets() ) {

    my ( $row_min, $row_max ) = $worksheet->row_range();
    my ( $col_min, $col_max ) = $worksheet->col_range();

    for my $row ( $row_min .. $row_max ) {
        for my $col ( $col_min .. $col_max ) {

            my $cell = $worksheet->get_cell( $row, $col );
            next unless $cell;

            print "Row, Col    = ($row, $col)\n";
            print "Value       = ", $cell->value(),       "\n";
            print "Unformatted = ", $cell->unformatted(), "\n";
            print "\n";
        }
    }
}

Um eine Excel-Datei mit Perl in Text zu konvertieren, würde ich empfehlen excel2txt das Spreadsheet::ParseExcel verwendet.

7voto

Mr. Muskrat Punkte 21333

Ich hatte großes Glück mit Tabellenkalkulation::ParseExcel .

4voto

Kent Fredric Punkte 55042

Am besten wäre es, wenn Sie ein Modul aus unserem geliebten CPAN .

Wann immer Sie haben ein Problem Ich frage mich sofort: Warum habe ich CPAN noch nicht überprüft?

Welches Modul das ist, kann ich nicht genau sagen, aber hier ist eine Liste, mit der Sie anfangen können, probieren Sie sie aus und sehen Sie, was funktioniert.

1voto

Yordan Georgiev Punkte 4516
@echo off  
ECHO CHECK THE VERSION FROM THE PROJECT
    echo see also this [link][1]
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i
ECHO The _Version is %_Version%

ECHO remove the output html files
del *.html /q
ECHO remove the output log files
del *.log /q

::pause
ECHO %0 > %0.log
ECHO %0.error.log >%0.error.log

set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version%
echo BaseDir is %BaseDir%  1>>%0.log 2>>%0.error.log
ECHO.

set LogLevel=3
echo LogLevel is %LogLevel%  1>>%0.log 2>>%0.error.log
ECHO.

::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls"
SET ExcelFileToParse="%BaseDir%\TODO.xls"
echo ExcelFileToParse is %ExcelFileToParse%  1>>%0.log 2>>%0.error.log
echo.

set OutputDir=%BaseDir%
echo OutputDir is %Outputdir%  1>>%0.log 2>>%0.error.log
echo.

ECHO SET THE UNICODE FOR PERL FOR UTF-8
SET PERL_UNICODE=S
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE%

::set PerlScript=parseExcelToCsv.pl
set PerlScript=ExcelToHtml.pl
echo PerlScript is %PerlScript%  1>>%0.log 2>>%0.error.log
ECHO.

echo Action !!!  1>>%0.log 2>>%0.error.log
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir%  1>>%0.log 2>>%0.error.log
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log

%0.error.log
%0.log

::debug pause

use strict;
use Spreadsheet::ParseExcel;
use utf8 ; 
use Encode ; 

package ExcelToHtml ; 

my $DebugLevel = 3 ; 
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n";
my $minimumAmountOfCommandLineArgs = 3 ; 
my ( $ExcelFileToParse , $OutputDir , $BaseFileName ) = (); 

sub main {
    logMsg("$0 SCRIPT START " , 1 );
    #Action !!! call here methods
    checkCommandLineArgs();     
    buildNames (); 

    ParseExcelAndlogMsgFiles ( $ExcelFileToParse  , $BaseFileName , $OutputDir) ; 

    logMsg("$0 SCRIPT STOP " , 1 );

} #eof MAIN 

sub buildNames 
{

    $ExcelFileToParse = "$ARGV[1]" ; 
    $OutputDir="$ARGV[2]" ;
    $BaseFileName = $ExcelFileToParse ; 
    logMsg( "\$ExcelFileToParse is $ExcelFileToParse" ) ; 
    logMsg ( "\$OutputDir is $OutputDir" ) ; 
    logMsg ("\$BaseFileName is $BaseFileName \n" );
    $BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/;    #strip the directory part 
    logMsg ("\$BaseFileName is $BaseFileName ");
    $BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ;  #strip the file extension
    logMsg ( "\$BaseFileName is $BaseFileName "); 

} #eof sub buildNames

sub ParseExcelAndlogMsgFiles {

my $ExcelFileToParse = shift ; 
my $BaseFileName = shift ; 
my $OutputDir = shift ; 
my $strToReturn = "";

my $parser   = Spreadsheet::ParseExcel->new();
my $workbook = $parser->Parse("$ExcelFileToParse");

for my $worksheet ( $workbook->worksheets() ) {

        my ( $row_min, $row_max ) = $worksheet->row_range();
        my ( $col_min, $col_max ) = $worksheet->col_range();

        for my $row ( $row_min .. $row_max ) {
                my $rowStr = "" ; 
                for my $col ( $col_min .. $col_max ) {

                        my $cell = $worksheet->get_cell( $row, $col );
                        next unless $cell;

                        logMsg ( " Row, Col    = ($row, $col)") ; 
                        logMsg ( "\$cell->encoding() is " . $cell->encoding()) ;

                        logMsg (" Value = ", $cell->value() );
                        $rowStr .=  makeCell($cell->value() )      ;                #The Value

                        logMsg ( "Unformatted = ", $cell->unformatted()) ; 

                } #eof col
            $rowStr = makeRow( $rowStr ); 
            $strToReturn .= $rowStr ; 
     } #eof for my row

        my $worksheetName = $worksheet->{'Name'}  ; 
        my $FileSheet = "$OutputDir/$BaseFileName" .  '.' . $worksheetName . '.' . 'html' ; 
        $strToReturn = makeTable ( $strToReturn );
        $strToReturn = makeFile ( $strToReturn );
        logMsg ( "\$strToReturn  is $strToReturn " ) ; 
        PrintToFile ( $FileSheet, $strToReturn )  ; 
        $strToReturn = "" ;
        $FileSheet = "" ;
        } #eof for my worksheet

} #eof sub

sub trim    
{
        $_[0]=~s/^\s+//;
        $_[0]=~s/\s+$//;
        return $_[0];
}

# =========================================== eof sub trim 
# Action !!!

sub makeRow 
{
    my $row = shift ; 
    return "<tr>" .  $row .  "</tr> \n"  ; 
}

sub makeCell 
{
    my $cell = shift ; 
    return "<td>"  .  "$cell" . " </td>" ; 
}

sub makeTable
{
    my $table = shift ; 
    return "\n <table> " . $table . "</table> \n" ; 

}

sub makeFile
{
    my $file = shift ; 

    $file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ; 
    return $file ; 
}

sub checkCommandLineArgs
{

        if ( @ARGV != $minimumAmountOfCommandLineArgs )
        {
            logErrorMsg ( "Not enougn command line args supplied exit 1" , 1 );
            die "usage: $0 1 args \n" ; 
            exit 1 ; 
        } #eof if
        else 
        {
                foreach (@ARGV) { logMsg ( "$_ " ) ;  }
        } #eof else         if ( @ARGV != 4 )

} #eof sub checkCommandLineArgs

#log a message according to the DebugLevel
sub logMsg 
{
    my $msg = shift ;
    my $importance = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth (); 
    my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" .  '.log' ; 
    $msg = GetANiceTime () .' --- ' .   $msg . " \n" ; 

    if ( $importance == 1 ) 
    {
        $msg = "\n============================================================ \n" . $msg ; 
        $msg = $msg . "============================================================ \n" ;
    }

    #debug nothing
    if ( $DebugLevel == 0 ) {   return ; } 

    #just logMsg the message
    if ( $DebugLevel == 1 ) 
    {
        logMsg ( $msg ); 
    } #eof if ( $DebugLevel == 1 ) 

    #logMsg the message in 
    if ( $DebugLevel == 2 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
        print LOG $msg  ; 
        close LOG;
    }

    #logMsg the message in 
    if ( $DebugLevel == 3 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
        print LOG $msg  ; 
        close LOG;
        print $msg ; 
    } #eof if ( $DebugLevel == 3 ) 

} #eof sub logMsg 

#log a message according to the DebugLevel
sub logErrorMsg 
{
    my $errorMsg = shift ; 
    my $importance  = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth (); 
    my $errorLogFile = "$CurrentPerlScriptName" . "_" .  "$niceMonth" . '.error.log' ; 
    $errorMsg = GetANiceTime () .' --- ' .   $errorMsg . " \n" ; 

        if ( $importance == 1 ) 
        {
            $errorMsg = "\n============================================================ \n" . $errorMsg ; 
            $errorMsg= $errorMsg. "============================================================ \n" ;
        }

    #debug nothing
    if ( $DebugLevel == 0 ) {   return ; } 

    #just logMsg the message
    if ( $DebugLevel == 1 )  {      print $errorMsg ;       } 

    #logMsg the message in a error log file
    if ( $DebugLevel == 2 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
        print ERRLOG $errorMsg ; 
        close ERRLOG;
    }

    #logMsg the message in 
    if ( $DebugLevel == 3 ) 
    {
        #READ ALL ROWS OF A FILE TO ALIST 
        open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
        print ERRLOG $errorMsg  ; 
        close ERRLOG;
        print $errorMsg ; 
    } #eof if ( $DebugLevel == 3 ) 

} #eof sub logErrorMsg 

#GET A NICE TIME 
sub GetANiceTime {
 # Purpose: returns the time in yyyymmdd-format 
 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
 #---- change 'month'- and 'year'-values to correct format ---- 
 $min = "0$min" if ($min < 10); 
 $hour = "0$hour" if ($hour < 10);
 $mon = $mon + 1;
 $mon = "0$mon" if ($mon < 10); 
 $year = $year + 1900;
 $mday = "0$mday" if ($mday < 10); 
 return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec"; 
} #eof sub GetANiceTime

sub GetANiceMonth {
 # Purpose: returns the time in yyyymmdd-format 
 my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
 #---- change 'month'- and 'year'-values to correct format ---- 
 $mon = $mon + 1;
 $mon = "0$mon" if ($mon < 10); 
 $year = $year + 1900;
 return "$year\.$mon" ; 
} #eof sub GetANiceTime

sub PrintToFile {
my $FileOutput = shift ; 
my $StringToPrint = shift ; 
#READ ALL ROWS OF A FILE TO ALIST 
open (FILEOUTPUT, ">$FileOutput") || 
            print "could not open the \$FileOutput $FileOutput!\n"; 
print  FILEOUTPUT $StringToPrint ; 
close FILEOUTPUT ;

#debug $strToReturn .=  $StringToPrint; 

}
# =========================================== eof sub PrintToFile

#Action !!!
main();

1 ; 

__END__

#VersionHistory: 
#1.0. --- Yordan Georgiev --- Initial creation 
#1.1 --- Yordan Georgiev --- Added conditional logging

1voto

DJ-Andrey-sXe Punkte 11

http://search.cpan.org/dist/Spreadsheet-Read/Read.pm

Spreadsheet::Read versucht, transparent zu lesen jede Tabellenkalkulation und gibt deren Inhalt auf universelle Weise zurück, unabhängig von dem Parsing-Modul, das die eigentliche Tabellenkalkulation scannt.

CodeJaeger.com

CodeJaeger ist eine Gemeinschaft für Programmierer, die täglich Hilfe erhalten..
Wir haben viele Inhalte, und Sie können auch Ihre eigenen Fragen stellen oder die Fragen anderer Leute lösen.

Powered by:

X