Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members   Related Pages  

cpl_csv.cpp

00001 /******************************************************************************
00002  * $Id: cpl_csv_cpp-source.html,v 1.6 2001/07/05 13:24:08 warmerda Exp $
00003  *
00004  * Project:  CPL - Common Portability Library
00005  * Purpose:  CSV (comma separated value) file access.
00006  * Author:   Frank Warmerdam, warmerda@home.com
00007  *
00008  ******************************************************************************
00009  * Copyright (c) 1999, Frank Warmerdam
00010  *
00011  * Permission is hereby granted, free of charge, to any person obtaining a
00012  * copy of this software and associated documentation files (the "Software"),
00013  * to deal in the Software without restriction, including without limitation
00014  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00015  * and/or sell copies of the Software, and to permit persons to whom the
00016  * Software is furnished to do so, subject to the following conditions:
00017  *
00018  * The above copyright notice and this permission notice shall be included
00019  * in all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00022  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00024  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00027  * DEALINGS IN THE SOFTWARE.
00028  ******************************************************************************
00029  *
00030  * $Log: cpl_csv_cpp-source.html,v $
00030  * Revision 1.6  2001/07/05 13:24:08  warmerda
00030  * *** empty log message ***
00030  *
00031  * Revision 1.2  2001/01/19 21:16:41  warmerda
00032  * expanded tabs
00033  *
00034  * Revision 1.1  2000/10/06 15:20:45  warmerda
00035  * New
00036  *
00037  * Revision 1.2  2000/08/29 21:08:08  warmerda
00038  * fallback to use CPLFindFile()
00039  *
00040  * Revision 1.1  2000/04/05 21:55:59  warmerda
00041  * New
00042  *
00043  */
00044 
00045 #include "cpl_csv.h"
00046 #include "cpl_conv.h"
00047 
00048 /* ==================================================================== */
00049 /*      The CSVTable is a persistant set of info about an open CSV      */
00050 /*      table.  While it doesn't currently maintain a record index,     */
00051 /*      or in-memory copy of the table, it could be changed to do so    */
00052 /*      in the future.                                                  */
00053 /* ==================================================================== */
00054 typedef struct ctb {
00055     FILE        *fp;
00056 
00057     struct ctb *psNext;
00058 
00059     char        *pszFilename;
00060 
00061     char        **papszFieldNames;
00062 
00063     char        **papszRecFields;
00064 } CSVTable;
00065 
00066 static CSVTable *psCSVTableList = NULL;
00067 
00068 /************************************************************************/
00069 /*                             CSVAccess()                              */
00070 /*                                                                      */
00071 /*      This function will fetch a handle to the requested table.       */
00072 /*      If not found in the ``open table list'' the table will be       */
00073 /*      opened and added to the list.  Eventually this function may     */
00074 /*      become public with an abstracted return type so that            */
00075 /*      applications can set options about the table.  For now this     */
00076 /*      isn't done.                                                     */
00077 /************************************************************************/
00078 
00079 static CSVTable *CSVAccess( const char * pszFilename )
00080 
00081 {
00082     CSVTable    *psTable;
00083     FILE        *fp;
00084 
00085 /* -------------------------------------------------------------------- */
00086 /*      Is the table already in the list.                               */
00087 /* -------------------------------------------------------------------- */
00088     for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00089     {
00090         if( EQUAL(psTable->pszFilename,pszFilename) )
00091         {
00092             /*
00093              * Eventually we should consider promoting to the front of
00094              * the list to accelerate frequently accessed tables.
00095              */
00096             
00097             return( psTable );
00098         }
00099     }
00100 
00101 /* -------------------------------------------------------------------- */
00102 /*      If not, try to open it.                                         */
00103 /* -------------------------------------------------------------------- */
00104     fp = VSIFOpen( pszFilename, "r" );
00105     if( fp == NULL )
00106         return NULL;
00107 
00108 /* -------------------------------------------------------------------- */
00109 /*      Create an information structure about this table, and add to    */
00110 /*      the front of the list.                                          */
00111 /* -------------------------------------------------------------------- */
00112     psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00113 
00114     psTable->fp = fp;
00115     psTable->pszFilename = CPLStrdup( pszFilename );
00116     psTable->psNext = psCSVTableList;
00117     
00118     psCSVTableList = psTable;
00119 
00120 /* -------------------------------------------------------------------- */
00121 /*      Read the table header record containing the field names.        */
00122 /* -------------------------------------------------------------------- */
00123     psTable->papszFieldNames = CSVReadParseLine( fp );
00124 
00125     return( psTable );
00126 }
00127 
00128 /************************************************************************/
00129 /*                            CSVDeaccess()                             */
00130 /************************************************************************/
00131 
00132 void CSVDeaccess( const char * pszFilename )
00133 
00134 {
00135     CSVTable    *psLast, *psTable;
00136     
00137 /* -------------------------------------------------------------------- */
00138 /*      A NULL means deaccess all tables.                               */
00139 /* -------------------------------------------------------------------- */
00140     if( pszFilename == NULL )
00141     {
00142         while( psCSVTableList != NULL )
00143             CSVDeaccess( psCSVTableList->pszFilename );
00144         
00145         return;
00146     }
00147 
00148 /* -------------------------------------------------------------------- */
00149 /*      Find this table.                                                */
00150 /* -------------------------------------------------------------------- */
00151     psLast = NULL;
00152     for( psTable = psCSVTableList;
00153          psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00154          psTable = psTable->psNext )
00155     {
00156         psLast = psTable;
00157     }
00158 
00159     if( psTable == NULL )
00160         return;
00161 
00162 /* -------------------------------------------------------------------- */
00163 /*      Remove the link from the list.                                  */
00164 /* -------------------------------------------------------------------- */
00165     if( psLast != NULL )
00166         psLast->psNext = psTable->psNext;
00167     else
00168         psCSVTableList = psTable->psNext;
00169 
00170 /* -------------------------------------------------------------------- */
00171 /*      Free the table.                                                 */
00172 /* -------------------------------------------------------------------- */
00173     VSIFClose( psTable->fp );
00174 
00175     CSLDestroy( psTable->papszFieldNames );
00176     CSLDestroy( psTable->papszRecFields );
00177     CPLFree( psTable->pszFilename );
00178 
00179     CPLFree( psTable );
00180 }
00181 
00182 /************************************************************************/
00183 /*                          CSVReadParseLine()                          */
00184 /*                                                                      */
00185 /*      Read one line, and return split into fields.  The return        */
00186 /*      result is a stringlist, in the sense of the CSL functions.      */
00187 /************************************************************************/
00188 
00189 char **CSVReadParseLine( FILE * fp )
00190 
00191 {
00192     const char  *pszLine;
00193     char        *pszWorkLine;
00194     char        **papszReturn;
00195 
00196     CPLAssert( fp != NULL );
00197     if( fp == NULL )
00198         return( NULL );
00199     
00200     pszLine = CPLReadLine( fp );
00201     if( pszLine == NULL )
00202         return( NULL );
00203 
00204 /* -------------------------------------------------------------------- */
00205 /*      If there are no quotes, then this is the simple case.           */
00206 /*      Parse, and return tokens.                                       */
00207 /* -------------------------------------------------------------------- */
00208     if( strchr(pszLine,'\"') == NULL )
00209         return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE );
00210 
00211 /* -------------------------------------------------------------------- */
00212 /*      We must now count the quotes in our working string, and as      */
00213 /*      long as it is odd, keep adding new lines.                       */
00214 /* -------------------------------------------------------------------- */
00215     pszWorkLine = CPLStrdup( pszLine );
00216 
00217     while( TRUE )
00218     {
00219         int             i, nCount = 0;
00220 
00221         for( i = 0; pszWorkLine[i] != '\0'; i++ )
00222         {
00223             if( pszWorkLine[i] == '\"'
00224                 && (i == 0 || pszWorkLine[i-1] != '\\') )
00225                 nCount++;
00226         }
00227 
00228         if( nCount % 2 == 0 )
00229             break;
00230 
00231         pszLine = CPLReadLine( fp );
00232         if( pszLine == NULL )
00233             break;
00234 
00235         pszWorkLine = (char *)
00236             CPLRealloc(pszWorkLine,
00237                        strlen(pszWorkLine) + strlen(pszLine) + 1);
00238         strcat( pszWorkLine, pszLine );
00239     }
00240     
00241     papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE );
00242 
00243     CPLFree( pszWorkLine );
00244 
00245     return papszReturn;
00246 }
00247 
00248 /************************************************************************/
00249 /*                             CSVCompare()                             */
00250 /*                                                                      */
00251 /*      Compare a field to a search value using a particular            */
00252 /*      criteria.                                                       */
00253 /************************************************************************/
00254 
00255 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00256                        CSVCompareCriteria eCriteria )
00257 
00258 {
00259     if( eCriteria == CC_ExactString )
00260     {
00261         return( strcmp( pszFieldValue, pszTarget ) == 0 );
00262     }
00263     else if( eCriteria == CC_ApproxString )
00264     {
00265         return( EQUAL( pszFieldValue, pszTarget ) );
00266     }
00267     else if( eCriteria == CC_Integer )
00268     {
00269         return( atoi(pszFieldValue) == atoi(pszTarget) );
00270     }
00271 
00272     return FALSE;
00273 }
00274 
00275 /************************************************************************/
00276 /*                            CSVScanLines()                            */
00277 /*                                                                      */
00278 /*      Read the file scanline for lines where the key field equals     */
00279 /*      the indicated value with the suggested comparison criteria.     */
00280 /*      Return the first matching line split into fields.               */
00281 /************************************************************************/
00282 
00283 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00284                      CSVCompareCriteria eCriteria )
00285 
00286 {
00287     char        **papszFields = NULL;
00288     int         bSelected = FALSE, nTestValue;
00289 
00290     CPLAssert( pszValue != NULL );
00291     CPLAssert( iKeyField >= 0 );
00292     CPLAssert( fp != NULL );
00293     
00294     nTestValue = atoi(pszValue);
00295     
00296     while( !bSelected ) {
00297         papszFields = CSVReadParseLine( fp );
00298         if( papszFields == NULL )
00299             return( NULL );
00300 
00301         if( CSLCount( papszFields ) < iKeyField+1 )
00302         {
00303             /* not selected */
00304         }
00305         else if( eCriteria == CC_Integer
00306                  && atoi(papszFields[iKeyField]) == nTestValue )
00307         {
00308             bSelected = TRUE;
00309         }
00310         else
00311         {
00312             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00313                                     eCriteria );
00314         }
00315 
00316         if( !bSelected )
00317         {
00318             CSLDestroy( papszFields );
00319             papszFields = NULL;
00320         }
00321     }
00322     
00323     return( papszFields );
00324 }
00325 
00326 /************************************************************************/
00327 /*                            CSVScanFile()                             */
00328 /*                                                                      */
00329 /*      Scan a whole file using criteria similar to above, but also     */
00330 /*      taking care of file opening and closing.                        */
00331 /************************************************************************/
00332 
00333 char **CSVScanFile( const char * pszFilename, int iKeyField,
00334                     const char * pszValue, CSVCompareCriteria eCriteria )
00335 
00336 {
00337     CSVTable    *psTable;
00338 
00339 /* -------------------------------------------------------------------- */
00340 /*      Get access to the table.                                        */
00341 /* -------------------------------------------------------------------- */
00342     CPLAssert( pszFilename != NULL );
00343 
00344     if( iKeyField < 0 )
00345         return NULL;
00346 
00347     psTable = CSVAccess( pszFilename );
00348     if( psTable == NULL )
00349         return NULL;
00350 
00351 /* -------------------------------------------------------------------- */
00352 /*      Does the current record match the criteria?  If so, just        */
00353 /*      return it again.                                                */
00354 /* -------------------------------------------------------------------- */
00355     if( iKeyField >= 0
00356         && iKeyField < CSLCount(psTable->papszRecFields)
00357         && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00358     {
00359         return psTable->papszRecFields;
00360     }
00361 
00362 /* -------------------------------------------------------------------- */
00363 /*      Scan the file from the beginning, replacing the ``current       */
00364 /*      record'' in our structure with the one that is found.           */
00365 /* -------------------------------------------------------------------- */
00366     VSIRewind( psTable->fp );
00367     CPLReadLine( psTable->fp );         /* throw away the header line */
00368     
00369     CSLDestroy( psTable->papszRecFields );
00370     psTable->papszRecFields =
00371         CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00372 
00373     return( psTable->papszRecFields );
00374 }
00375 
00376 /************************************************************************/
00377 /*                           CPLGetFieldId()                            */
00378 /*                                                                      */
00379 /*      Read the first record of a CSV file (rewinding to be sure),     */
00380 /*      and find the field with the indicated name.  Returns -1 if      */
00381 /*      it fails to find the field name.  Comparison is case            */
00382 /*      insensitive, but otherwise exact.  After this function has      */
00383 /*      been called the file pointer will be positioned just after      */
00384 /*      the first record.                                               */
00385 /************************************************************************/
00386 
00387 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00388 
00389 {
00390     char        **papszFields;
00391     int         i;
00392     
00393     CPLAssert( fp != NULL && pszFieldName != NULL );
00394 
00395     VSIRewind( fp );
00396 
00397     papszFields = CSVReadParseLine( fp );
00398     for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00399     {
00400         if( EQUAL(papszFields[i],pszFieldName) )
00401         {
00402             CSLDestroy( papszFields );
00403             return i;
00404         }
00405     }
00406 
00407     CSLDestroy( papszFields );
00408 
00409     return -1;
00410 }
00411 
00412 /************************************************************************/
00413 /*                         CSVGetFileFieldId()                          */
00414 /*                                                                      */
00415 /*      Same as CPLGetFieldId(), except that we get the file based      */
00416 /*      on filename, rather than having an existing handle.             */
00417 /************************************************************************/
00418 
00419 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00420 
00421 {
00422     CSVTable    *psTable;
00423     int         i;
00424     
00425 /* -------------------------------------------------------------------- */
00426 /*      Get access to the table.                                        */
00427 /* -------------------------------------------------------------------- */
00428     CPLAssert( pszFilename != NULL );
00429 
00430     psTable = CSVAccess( pszFilename );
00431     if( psTable == NULL )
00432         return -1;
00433 
00434 /* -------------------------------------------------------------------- */
00435 /*      Find the requested field.                                       */
00436 /* -------------------------------------------------------------------- */
00437     for( i = 0;
00438          psTable->papszFieldNames != NULL
00439              && psTable->papszFieldNames[i] != NULL;
00440          i++ )
00441     {
00442         if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00443         {
00444             return i;
00445         }
00446     }
00447 
00448     return -1;
00449 }
00450 
00451 
00452 /************************************************************************/
00453 /*                         CSVScanFileByName()                          */
00454 /*                                                                      */
00455 /*      Same as CSVScanFile(), but using a field name instead of a      */
00456 /*      field number.                                                   */
00457 /************************************************************************/
00458 
00459 char **CSVScanFileByName( const char * pszFilename,
00460                           const char * pszKeyFieldName,
00461                           const char * pszValue, CSVCompareCriteria eCriteria )
00462 
00463 {
00464     int         iKeyField;
00465 
00466     iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00467     if( iKeyField == -1 )
00468         return NULL;
00469 
00470     return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00471 }
00472 
00473 /************************************************************************/
00474 /*                            CSVGetField()                             */
00475 /*                                                                      */
00476 /*      The all-in-one function to fetch a particular field value       */
00477 /*      from a CSV file.  Note this function will return an empty       */
00478 /*      string, rather than NULL if it fails to find the desired        */
00479 /*      value for some reason.  The caller can't establish that the     */
00480 /*      fetch failed.                                                   */
00481 /************************************************************************/
00482 
00483 const char *CSVGetField( const char * pszFilename,
00484                          const char * pszKeyFieldName,
00485                          const char * pszKeyFieldValue,
00486                          CSVCompareCriteria eCriteria,
00487                          const char * pszTargetField )
00488 
00489 {
00490     CSVTable    *psTable;
00491     char        **papszRecord;
00492     int         iTargetField;
00493     
00494 /* -------------------------------------------------------------------- */
00495 /*      Find the table.                                                 */
00496 /* -------------------------------------------------------------------- */
00497     psTable = CSVAccess( pszFilename );
00498     if( psTable == NULL )
00499         return "";
00500 
00501 /* -------------------------------------------------------------------- */
00502 /*      Find the correct record.                                        */
00503 /* -------------------------------------------------------------------- */
00504     papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00505                                      pszKeyFieldValue, eCriteria );
00506 
00507     if( papszRecord == NULL )
00508         return "";
00509 
00510 /* -------------------------------------------------------------------- */
00511 /*      Figure out which field we want out of this.                     */
00512 /* -------------------------------------------------------------------- */
00513     iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00514     if( iTargetField < 0 )
00515         return "";
00516 
00517     if( iTargetField >= CSLCount( papszRecord ) )
00518         return "";
00519 
00520     return( papszRecord[iTargetField] );
00521 }
00522 
00523 /************************************************************************/
00524 /*                            CSVFilename()                             */
00525 /*                                                                      */
00526 /*      Return the full path to a particular CSV file.  This will       */
00527 /*      eventually be something the application can override.           */
00528 /************************************************************************/
00529 
00530 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00531 
00532 const char * CSVFilename( const char *pszBasename )
00533 
00534 {
00535     static char         szPath[512];
00536 
00537     if( pfnCSVFilenameHook == NULL )
00538     {
00539         FILE    *fp = NULL;
00540         const char *pszResult = CPLFindFile( "epsg_csv", pszBasename );
00541 
00542         if( pszResult != NULL )
00543             return pszResult;
00544 
00545         if( getenv("GEOTIFF_CSV") != NULL )
00546         {
00547             sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00548         }
00549         else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00550         {
00551             sprintf( szPath, "csv/%s", pszBasename );
00552         }
00553         else
00554         {
00555             sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00556         }
00557 
00558         if( fp != NULL )
00559             fclose( fp );
00560         
00561         return( szPath );
00562     }
00563     else
00564         return( pfnCSVFilenameHook( pszBasename ) );
00565 }
00566 
00567 /************************************************************************/
00568 /*                         SetCSVFilenameHook()                         */
00569 /*                                                                      */
00570 /*      Applications can use this to set a function that will           */
00571 /*      massage CSV filenames.                                          */
00572 /************************************************************************/
00573 
00618 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00619 
00620 {
00621     pfnCSVFilenameHook = pfnNewHook;
00622 }

Generated at Thu Jul 5 09:16:11 2001 for GDAL by doxygen1.2.3-20001105 written by Dimitri van Heesch, © 1997-2000