00001 /****************************************************************************** 00002 * $Id: cpl_csv_cpp-source.html,v 1.6 2001/07/05 13:24:08 warmerda Exp $ 00003 * 00004 * Project: CPL - Common Portability Library 00005 * Purpose: CSV (comma separated value) file access. 00006 * Author: Frank Warmerdam, warmerda@home.com 00007 * 00008 ****************************************************************************** 00009 * Copyright (c) 1999, Frank Warmerdam 00010 * 00011 * Permission is hereby granted, free of charge, to any person obtaining a 00012 * copy of this software and associated documentation files (the "Software"), 00013 * to deal in the Software without restriction, including without limitation 00014 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00015 * and/or sell copies of the Software, and to permit persons to whom the 00016 * Software is furnished to do so, subject to the following conditions: 00017 * 00018 * The above copyright notice and this permission notice shall be included 00019 * in all copies or substantial portions of the Software. 00020 * 00021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00022 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00023 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00024 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00025 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00026 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00027 * DEALINGS IN THE SOFTWARE. 00028 ****************************************************************************** 00029 * 00030 * $Log: cpl_csv_cpp-source.html,v $ 00030 * Revision 1.6 2001/07/05 13:24:08 warmerda 00030 * *** empty log message *** 00030 * 00031 * Revision 1.2 2001/01/19 21:16:41 warmerda 00032 * expanded tabs 00033 * 00034 * Revision 1.1 2000/10/06 15:20:45 warmerda 00035 * New 00036 * 00037 * Revision 1.2 2000/08/29 21:08:08 warmerda 00038 * fallback to use CPLFindFile() 00039 * 00040 * Revision 1.1 2000/04/05 21:55:59 warmerda 00041 * New 00042 * 00043 */ 00044 00045 #include "cpl_csv.h" 00046 #include "cpl_conv.h" 00047 00048 /* ==================================================================== */ 00049 /* The CSVTable is a persistant set of info about an open CSV */ 00050 /* table. While it doesn't currently maintain a record index, */ 00051 /* or in-memory copy of the table, it could be changed to do so */ 00052 /* in the future. */ 00053 /* ==================================================================== */ 00054 typedef struct ctb { 00055 FILE *fp; 00056 00057 struct ctb *psNext; 00058 00059 char *pszFilename; 00060 00061 char **papszFieldNames; 00062 00063 char **papszRecFields; 00064 } CSVTable; 00065 00066 static CSVTable *psCSVTableList = NULL; 00067 00068 /************************************************************************/ 00069 /* CSVAccess() */ 00070 /* */ 00071 /* This function will fetch a handle to the requested table. */ 00072 /* If not found in the ``open table list'' the table will be */ 00073 /* opened and added to the list. Eventually this function may */ 00074 /* become public with an abstracted return type so that */ 00075 /* applications can set options about the table. For now this */ 00076 /* isn't done. */ 00077 /************************************************************************/ 00078 00079 static CSVTable *CSVAccess( const char * pszFilename ) 00080 00081 { 00082 CSVTable *psTable; 00083 FILE *fp; 00084 00085 /* -------------------------------------------------------------------- */ 00086 /* Is the table already in the list. */ 00087 /* -------------------------------------------------------------------- */ 00088 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) 00089 { 00090 if( EQUAL(psTable->pszFilename,pszFilename) ) 00091 { 00092 /* 00093 * Eventually we should consider promoting to the front of 00094 * the list to accelerate frequently accessed tables. 00095 */ 00096 00097 return( psTable ); 00098 } 00099 } 00100 00101 /* -------------------------------------------------------------------- */ 00102 /* If not, try to open it. */ 00103 /* -------------------------------------------------------------------- */ 00104 fp = VSIFOpen( pszFilename, "r" ); 00105 if( fp == NULL ) 00106 return NULL; 00107 00108 /* -------------------------------------------------------------------- */ 00109 /* Create an information structure about this table, and add to */ 00110 /* the front of the list. */ 00111 /* -------------------------------------------------------------------- */ 00112 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); 00113 00114 psTable->fp = fp; 00115 psTable->pszFilename = CPLStrdup( pszFilename ); 00116 psTable->psNext = psCSVTableList; 00117 00118 psCSVTableList = psTable; 00119 00120 /* -------------------------------------------------------------------- */ 00121 /* Read the table header record containing the field names. */ 00122 /* -------------------------------------------------------------------- */ 00123 psTable->papszFieldNames = CSVReadParseLine( fp ); 00124 00125 return( psTable ); 00126 } 00127 00128 /************************************************************************/ 00129 /* CSVDeaccess() */ 00130 /************************************************************************/ 00131 00132 void CSVDeaccess( const char * pszFilename ) 00133 00134 { 00135 CSVTable *psLast, *psTable; 00136 00137 /* -------------------------------------------------------------------- */ 00138 /* A NULL means deaccess all tables. */ 00139 /* -------------------------------------------------------------------- */ 00140 if( pszFilename == NULL ) 00141 { 00142 while( psCSVTableList != NULL ) 00143 CSVDeaccess( psCSVTableList->pszFilename ); 00144 00145 return; 00146 } 00147 00148 /* -------------------------------------------------------------------- */ 00149 /* Find this table. */ 00150 /* -------------------------------------------------------------------- */ 00151 psLast = NULL; 00152 for( psTable = psCSVTableList; 00153 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); 00154 psTable = psTable->psNext ) 00155 { 00156 psLast = psTable; 00157 } 00158 00159 if( psTable == NULL ) 00160 return; 00161 00162 /* -------------------------------------------------------------------- */ 00163 /* Remove the link from the list. */ 00164 /* -------------------------------------------------------------------- */ 00165 if( psLast != NULL ) 00166 psLast->psNext = psTable->psNext; 00167 else 00168 psCSVTableList = psTable->psNext; 00169 00170 /* -------------------------------------------------------------------- */ 00171 /* Free the table. */ 00172 /* -------------------------------------------------------------------- */ 00173 VSIFClose( psTable->fp ); 00174 00175 CSLDestroy( psTable->papszFieldNames ); 00176 CSLDestroy( psTable->papszRecFields ); 00177 CPLFree( psTable->pszFilename ); 00178 00179 CPLFree( psTable ); 00180 } 00181 00182 /************************************************************************/ 00183 /* CSVReadParseLine() */ 00184 /* */ 00185 /* Read one line, and return split into fields. The return */ 00186 /* result is a stringlist, in the sense of the CSL functions. */ 00187 /************************************************************************/ 00188 00189 char **CSVReadParseLine( FILE * fp ) 00190 00191 { 00192 const char *pszLine; 00193 char *pszWorkLine; 00194 char **papszReturn; 00195 00196 CPLAssert( fp != NULL ); 00197 if( fp == NULL ) 00198 return( NULL ); 00199 00200 pszLine = CPLReadLine( fp ); 00201 if( pszLine == NULL ) 00202 return( NULL ); 00203 00204 /* -------------------------------------------------------------------- */ 00205 /* If there are no quotes, then this is the simple case. */ 00206 /* Parse, and return tokens. */ 00207 /* -------------------------------------------------------------------- */ 00208 if( strchr(pszLine,'\"') == NULL ) 00209 return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE ); 00210 00211 /* -------------------------------------------------------------------- */ 00212 /* We must now count the quotes in our working string, and as */ 00213 /* long as it is odd, keep adding new lines. */ 00214 /* -------------------------------------------------------------------- */ 00215 pszWorkLine = CPLStrdup( pszLine ); 00216 00217 while( TRUE ) 00218 { 00219 int i, nCount = 0; 00220 00221 for( i = 0; pszWorkLine[i] != '\0'; i++ ) 00222 { 00223 if( pszWorkLine[i] == '\"' 00224 && (i == 0 || pszWorkLine[i-1] != '\\') ) 00225 nCount++; 00226 } 00227 00228 if( nCount % 2 == 0 ) 00229 break; 00230 00231 pszLine = CPLReadLine( fp ); 00232 if( pszLine == NULL ) 00233 break; 00234 00235 pszWorkLine = (char *) 00236 CPLRealloc(pszWorkLine, 00237 strlen(pszWorkLine) + strlen(pszLine) + 1); 00238 strcat( pszWorkLine, pszLine ); 00239 } 00240 00241 papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE ); 00242 00243 CPLFree( pszWorkLine ); 00244 00245 return papszReturn; 00246 } 00247 00248 /************************************************************************/ 00249 /* CSVCompare() */ 00250 /* */ 00251 /* Compare a field to a search value using a particular */ 00252 /* criteria. */ 00253 /************************************************************************/ 00254 00255 static int CSVCompare( const char * pszFieldValue, const char * pszTarget, 00256 CSVCompareCriteria eCriteria ) 00257 00258 { 00259 if( eCriteria == CC_ExactString ) 00260 { 00261 return( strcmp( pszFieldValue, pszTarget ) == 0 ); 00262 } 00263 else if( eCriteria == CC_ApproxString ) 00264 { 00265 return( EQUAL( pszFieldValue, pszTarget ) ); 00266 } 00267 else if( eCriteria == CC_Integer ) 00268 { 00269 return( atoi(pszFieldValue) == atoi(pszTarget) ); 00270 } 00271 00272 return FALSE; 00273 } 00274 00275 /************************************************************************/ 00276 /* CSVScanLines() */ 00277 /* */ 00278 /* Read the file scanline for lines where the key field equals */ 00279 /* the indicated value with the suggested comparison criteria. */ 00280 /* Return the first matching line split into fields. */ 00281 /************************************************************************/ 00282 00283 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue, 00284 CSVCompareCriteria eCriteria ) 00285 00286 { 00287 char **papszFields = NULL; 00288 int bSelected = FALSE, nTestValue; 00289 00290 CPLAssert( pszValue != NULL ); 00291 CPLAssert( iKeyField >= 0 ); 00292 CPLAssert( fp != NULL ); 00293 00294 nTestValue = atoi(pszValue); 00295 00296 while( !bSelected ) { 00297 papszFields = CSVReadParseLine( fp ); 00298 if( papszFields == NULL ) 00299 return( NULL ); 00300 00301 if( CSLCount( papszFields ) < iKeyField+1 ) 00302 { 00303 /* not selected */ 00304 } 00305 else if( eCriteria == CC_Integer 00306 && atoi(papszFields[iKeyField]) == nTestValue ) 00307 { 00308 bSelected = TRUE; 00309 } 00310 else 00311 { 00312 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00313 eCriteria ); 00314 } 00315 00316 if( !bSelected ) 00317 { 00318 CSLDestroy( papszFields ); 00319 papszFields = NULL; 00320 } 00321 } 00322 00323 return( papszFields ); 00324 } 00325 00326 /************************************************************************/ 00327 /* CSVScanFile() */ 00328 /* */ 00329 /* Scan a whole file using criteria similar to above, but also */ 00330 /* taking care of file opening and closing. */ 00331 /************************************************************************/ 00332 00333 char **CSVScanFile( const char * pszFilename, int iKeyField, 00334 const char * pszValue, CSVCompareCriteria eCriteria ) 00335 00336 { 00337 CSVTable *psTable; 00338 00339 /* -------------------------------------------------------------------- */ 00340 /* Get access to the table. */ 00341 /* -------------------------------------------------------------------- */ 00342 CPLAssert( pszFilename != NULL ); 00343 00344 if( iKeyField < 0 ) 00345 return NULL; 00346 00347 psTable = CSVAccess( pszFilename ); 00348 if( psTable == NULL ) 00349 return NULL; 00350 00351 /* -------------------------------------------------------------------- */ 00352 /* Does the current record match the criteria? If so, just */ 00353 /* return it again. */ 00354 /* -------------------------------------------------------------------- */ 00355 if( iKeyField >= 0 00356 && iKeyField < CSLCount(psTable->papszRecFields) 00357 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) ) 00358 { 00359 return psTable->papszRecFields; 00360 } 00361 00362 /* -------------------------------------------------------------------- */ 00363 /* Scan the file from the beginning, replacing the ``current */ 00364 /* record'' in our structure with the one that is found. */ 00365 /* -------------------------------------------------------------------- */ 00366 VSIRewind( psTable->fp ); 00367 CPLReadLine( psTable->fp ); /* throw away the header line */ 00368 00369 CSLDestroy( psTable->papszRecFields ); 00370 psTable->papszRecFields = 00371 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria ); 00372 00373 return( psTable->papszRecFields ); 00374 } 00375 00376 /************************************************************************/ 00377 /* CPLGetFieldId() */ 00378 /* */ 00379 /* Read the first record of a CSV file (rewinding to be sure), */ 00380 /* and find the field with the indicated name. Returns -1 if */ 00381 /* it fails to find the field name. Comparison is case */ 00382 /* insensitive, but otherwise exact. After this function has */ 00383 /* been called the file pointer will be positioned just after */ 00384 /* the first record. */ 00385 /************************************************************************/ 00386 00387 int CSVGetFieldId( FILE * fp, const char * pszFieldName ) 00388 00389 { 00390 char **papszFields; 00391 int i; 00392 00393 CPLAssert( fp != NULL && pszFieldName != NULL ); 00394 00395 VSIRewind( fp ); 00396 00397 papszFields = CSVReadParseLine( fp ); 00398 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ ) 00399 { 00400 if( EQUAL(papszFields[i],pszFieldName) ) 00401 { 00402 CSLDestroy( papszFields ); 00403 return i; 00404 } 00405 } 00406 00407 CSLDestroy( papszFields ); 00408 00409 return -1; 00410 } 00411 00412 /************************************************************************/ 00413 /* CSVGetFileFieldId() */ 00414 /* */ 00415 /* Same as CPLGetFieldId(), except that we get the file based */ 00416 /* on filename, rather than having an existing handle. */ 00417 /************************************************************************/ 00418 00419 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName ) 00420 00421 { 00422 CSVTable *psTable; 00423 int i; 00424 00425 /* -------------------------------------------------------------------- */ 00426 /* Get access to the table. */ 00427 /* -------------------------------------------------------------------- */ 00428 CPLAssert( pszFilename != NULL ); 00429 00430 psTable = CSVAccess( pszFilename ); 00431 if( psTable == NULL ) 00432 return -1; 00433 00434 /* -------------------------------------------------------------------- */ 00435 /* Find the requested field. */ 00436 /* -------------------------------------------------------------------- */ 00437 for( i = 0; 00438 psTable->papszFieldNames != NULL 00439 && psTable->papszFieldNames[i] != NULL; 00440 i++ ) 00441 { 00442 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) ) 00443 { 00444 return i; 00445 } 00446 } 00447 00448 return -1; 00449 } 00450 00451 00452 /************************************************************************/ 00453 /* CSVScanFileByName() */ 00454 /* */ 00455 /* Same as CSVScanFile(), but using a field name instead of a */ 00456 /* field number. */ 00457 /************************************************************************/ 00458 00459 char **CSVScanFileByName( const char * pszFilename, 00460 const char * pszKeyFieldName, 00461 const char * pszValue, CSVCompareCriteria eCriteria ) 00462 00463 { 00464 int iKeyField; 00465 00466 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName ); 00467 if( iKeyField == -1 ) 00468 return NULL; 00469 00470 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) ); 00471 } 00472 00473 /************************************************************************/ 00474 /* CSVGetField() */ 00475 /* */ 00476 /* The all-in-one function to fetch a particular field value */ 00477 /* from a CSV file. Note this function will return an empty */ 00478 /* string, rather than NULL if it fails to find the desired */ 00479 /* value for some reason. The caller can't establish that the */ 00480 /* fetch failed. */ 00481 /************************************************************************/ 00482 00483 const char *CSVGetField( const char * pszFilename, 00484 const char * pszKeyFieldName, 00485 const char * pszKeyFieldValue, 00486 CSVCompareCriteria eCriteria, 00487 const char * pszTargetField ) 00488 00489 { 00490 CSVTable *psTable; 00491 char **papszRecord; 00492 int iTargetField; 00493 00494 /* -------------------------------------------------------------------- */ 00495 /* Find the table. */ 00496 /* -------------------------------------------------------------------- */ 00497 psTable = CSVAccess( pszFilename ); 00498 if( psTable == NULL ) 00499 return ""; 00500 00501 /* -------------------------------------------------------------------- */ 00502 /* Find the correct record. */ 00503 /* -------------------------------------------------------------------- */ 00504 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName, 00505 pszKeyFieldValue, eCriteria ); 00506 00507 if( papszRecord == NULL ) 00508 return ""; 00509 00510 /* -------------------------------------------------------------------- */ 00511 /* Figure out which field we want out of this. */ 00512 /* -------------------------------------------------------------------- */ 00513 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField ); 00514 if( iTargetField < 0 ) 00515 return ""; 00516 00517 if( iTargetField >= CSLCount( papszRecord ) ) 00518 return ""; 00519 00520 return( papszRecord[iTargetField] ); 00521 } 00522 00523 /************************************************************************/ 00524 /* CSVFilename() */ 00525 /* */ 00526 /* Return the full path to a particular CSV file. This will */ 00527 /* eventually be something the application can override. */ 00528 /************************************************************************/ 00529 00530 static const char *(*pfnCSVFilenameHook)(const char *) = NULL; 00531 00532 const char * CSVFilename( const char *pszBasename ) 00533 00534 { 00535 static char szPath[512]; 00536 00537 if( pfnCSVFilenameHook == NULL ) 00538 { 00539 FILE *fp = NULL; 00540 const char *pszResult = CPLFindFile( "epsg_csv", pszBasename ); 00541 00542 if( pszResult != NULL ) 00543 return pszResult; 00544 00545 if( getenv("GEOTIFF_CSV") != NULL ) 00546 { 00547 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename ); 00548 } 00549 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL ) 00550 { 00551 sprintf( szPath, "csv/%s", pszBasename ); 00552 } 00553 else 00554 { 00555 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename ); 00556 } 00557 00558 if( fp != NULL ) 00559 fclose( fp ); 00560 00561 return( szPath ); 00562 } 00563 else 00564 return( pfnCSVFilenameHook( pszBasename ) ); 00565 } 00566 00567 /************************************************************************/ 00568 /* SetCSVFilenameHook() */ 00569 /* */ 00570 /* Applications can use this to set a function that will */ 00571 /* massage CSV filenames. */ 00572 /************************************************************************/ 00573 00618 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) ) 00619 00620 { 00621 pfnCSVFilenameHook = pfnNewHook; 00622 }