Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members   Related Pages  

cpl_csv.c

00001 /******************************************************************************
00002  * $Id: cpl_csv_c-source.html,v 1.1 2000/09/25 20:50:11 warmerda Exp $
00003  *
00004  * Project:  CPL - Common Portability Library
00005  * Purpose:  CSV (comma separated value) file access.
00006  * Author:   Frank Warmerdam, warmerda@home.com
00007  *
00008  ******************************************************************************
00009  * Copyright (c) 1999, Frank Warmerdam
00010  *
00011  * Permission is hereby granted, free of charge, to any person obtaining a
00012  * copy of this software and associated documentation files (the "Software"),
00013  * to deal in the Software without restriction, including without limitation
00014  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00015  * and/or sell copies of the Software, and to permit persons to whom the
00016  * Software is furnished to do so, subject to the following conditions:
00017  *
00018  * The above copyright notice and this permission notice shall be included
00019  * in all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00022  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00024  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00027  * DEALINGS IN THE SOFTWARE.
00028  ******************************************************************************
00029  *
00030  * $Log: cpl_csv_c-source.html,v $
00030  * Revision 1.1  2000/09/25 20:50:11  warmerda
00030  * New
00030  *
00031  * Revision 1.2  2000/08/29 21:08:08  warmerda
00032  * fallback to use CPLFindFile()
00033  *
00034  * Revision 1.1  2000/04/05 21:55:59  warmerda
00035  * New
00036  *
00037  */
00038 
00039 #include "cpl_csv.h"
00040 #include "cpl_conv.h"
00041 
00042 /* ==================================================================== */
00043 /*      The CSVTable is a persistant set of info about an open CSV      */
00044 /*      table.  While it doesn't currently maintain a record index,     */
00045 /*      or in-memory copy of the table, it could be changed to do so    */
00046 /*      in the future.                                                  */
00047 /* ==================================================================== */
00048 typedef struct ctb {
00049     FILE        *fp;
00050 
00051     struct ctb *psNext;
00052 
00053     char        *pszFilename;
00054 
00055     char        **papszFieldNames;
00056 
00057     char        **papszRecFields;
00058 } CSVTable;
00059 
00060 static CSVTable *psCSVTableList = NULL;
00061 
00062 /************************************************************************/
00063 /*                             CSVAccess()                              */
00064 /*                                                                      */
00065 /*      This function will fetch a handle to the requested table.       */
00066 /*      If not found in the ``open table list'' the table will be       */
00067 /*      opened and added to the list.  Eventually this function may     */
00068 /*      become public with an abstracted return type so that            */
00069 /*      applications can set options about the table.  For now this     */
00070 /*      isn't done.                                                     */
00071 /************************************************************************/
00072 
00073 static CSVTable *CSVAccess( const char * pszFilename )
00074 
00075 {
00076     CSVTable    *psTable;
00077     FILE        *fp;
00078 
00079 /* -------------------------------------------------------------------- */
00080 /*      Is the table already in the list.                               */
00081 /* -------------------------------------------------------------------- */
00082     for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00083     {
00084         if( EQUAL(psTable->pszFilename,pszFilename) )
00085         {
00086             /*
00087              * Eventually we should consider promoting to the front of
00088              * the list to accelerate frequently accessed tables.
00089              */
00090             
00091             return( psTable );
00092         }
00093     }
00094 
00095 /* -------------------------------------------------------------------- */
00096 /*      If not, try to open it.                                         */
00097 /* -------------------------------------------------------------------- */
00098     fp = VSIFOpen( pszFilename, "r" );
00099     if( fp == NULL )
00100         return NULL;
00101 
00102 /* -------------------------------------------------------------------- */
00103 /*      Create an information structure about this table, and add to    */
00104 /*      the front of the list.                                          */
00105 /* -------------------------------------------------------------------- */
00106     psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00107 
00108     psTable->fp = fp;
00109     psTable->pszFilename = CPLStrdup( pszFilename );
00110     psTable->psNext = psCSVTableList;
00111     
00112     psCSVTableList = psTable;
00113 
00114 /* -------------------------------------------------------------------- */
00115 /*      Read the table header record containing the field names.        */
00116 /* -------------------------------------------------------------------- */
00117     psTable->papszFieldNames = CSVReadParseLine( fp );
00118 
00119     return( psTable );
00120 }
00121 
00122 /************************************************************************/
00123 /*                            CSVDeaccess()                             */
00124 /************************************************************************/
00125 
00126 void CSVDeaccess( const char * pszFilename )
00127 
00128 {
00129     CSVTable    *psLast, *psTable;
00130     
00131 /* -------------------------------------------------------------------- */
00132 /*      A NULL means deaccess all tables.                               */
00133 /* -------------------------------------------------------------------- */
00134     if( pszFilename == NULL )
00135     {
00136         while( psCSVTableList != NULL )
00137             CSVDeaccess( psCSVTableList->pszFilename );
00138         
00139         return;
00140     }
00141 
00142 /* -------------------------------------------------------------------- */
00143 /*      Find this table.                                                */
00144 /* -------------------------------------------------------------------- */
00145     psLast = NULL;
00146     for( psTable = psCSVTableList;
00147          psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00148          psTable = psTable->psNext )
00149     {
00150         psLast = psTable;
00151     }
00152 
00153     if( psTable == NULL )
00154         return;
00155 
00156 /* -------------------------------------------------------------------- */
00157 /*      Remove the link from the list.                                  */
00158 /* -------------------------------------------------------------------- */
00159     if( psLast != NULL )
00160         psLast->psNext = psTable->psNext;
00161     else
00162         psCSVTableList = psTable->psNext;
00163 
00164 /* -------------------------------------------------------------------- */
00165 /*      Free the table.                                                 */
00166 /* -------------------------------------------------------------------- */
00167     VSIFClose( psTable->fp );
00168 
00169     CSLDestroy( psTable->papszFieldNames );
00170     CSLDestroy( psTable->papszRecFields );
00171     CPLFree( psTable->pszFilename );
00172 
00173     CPLFree( psTable );
00174 }
00175 
00176 /************************************************************************/
00177 /*                          CSVReadParseLine()                          */
00178 /*                                                                      */
00179 /*      Read one line, and return split into fields.  The return        */
00180 /*      result is a stringlist, in the sense of the CSL functions.      */
00181 /************************************************************************/
00182 
00183 char **CSVReadParseLine( FILE * fp )
00184 
00185 {
00186     const char  *pszLine;
00187     char        *pszWorkLine;
00188     char        **papszReturn;
00189 
00190     CPLAssert( fp != NULL );
00191     if( fp == NULL )
00192         return( NULL );
00193     
00194     pszLine = CPLReadLine( fp );
00195     if( pszLine == NULL )
00196         return( NULL );
00197 
00198 /* -------------------------------------------------------------------- */
00199 /*      If there are no quotes, then this is the simple case.           */
00200 /*      Parse, and return tokens.                                       */
00201 /* -------------------------------------------------------------------- */
00202     if( strchr(pszLine,'\"') == NULL )
00203         return CSLTokenizeStringComplex( pszLine, ",", TRUE, TRUE );
00204 
00205 /* -------------------------------------------------------------------- */
00206 /*      We must now count the quotes in our working string, and as      */
00207 /*      long as it is odd, keep adding new lines.                       */
00208 /* -------------------------------------------------------------------- */
00209     pszWorkLine = CPLStrdup( pszLine );
00210 
00211     while( TRUE )
00212     {
00213         int             i, nCount = 0;
00214 
00215         for( i = 0; pszWorkLine[i] != '\0'; i++ )
00216         {
00217             if( pszWorkLine[i] == '\"'
00218                 && (i == 0 || pszWorkLine[i-1] != '\\') )
00219                 nCount++;
00220         }
00221 
00222         if( nCount % 2 == 0 )
00223             break;
00224 
00225         pszLine = CPLReadLine( fp );
00226         if( pszLine == NULL )
00227             break;
00228 
00229         pszWorkLine = (char *)
00230             CPLRealloc(pszWorkLine,
00231                        strlen(pszWorkLine) + strlen(pszLine) + 1);
00232         strcat( pszWorkLine, pszLine );
00233     }
00234     
00235     papszReturn = CSLTokenizeStringComplex( pszWorkLine, ",", TRUE, TRUE );
00236 
00237     CPLFree( pszWorkLine );
00238 
00239     return papszReturn;
00240 }
00241 
00242 /************************************************************************/
00243 /*                             CSVCompare()                             */
00244 /*                                                                      */
00245 /*      Compare a field to a search value using a particular            */
00246 /*      criteria.                                                       */
00247 /************************************************************************/
00248 
00249 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00250                        CSVCompareCriteria eCriteria )
00251 
00252 {
00253     if( eCriteria == CC_ExactString )
00254     {
00255         return( strcmp( pszFieldValue, pszTarget ) == 0 );
00256     }
00257     else if( eCriteria == CC_ApproxString )
00258     {
00259         return( EQUAL( pszFieldValue, pszTarget ) );
00260     }
00261     else if( eCriteria == CC_Integer )
00262     {
00263         return( atoi(pszFieldValue) == atoi(pszTarget) );
00264     }
00265 
00266     return FALSE;
00267 }
00268 
00269 /************************************************************************/
00270 /*                            CSVScanLines()                            */
00271 /*                                                                      */
00272 /*      Read the file scanline for lines where the key field equals     */
00273 /*      the indicated value with the suggested comparison criteria.     */
00274 /*      Return the first matching line split into fields.               */
00275 /************************************************************************/
00276 
00277 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00278                      CSVCompareCriteria eCriteria )
00279 
00280 {
00281     char        **papszFields = NULL;
00282     int         bSelected = FALSE, nTestValue;
00283 
00284     CPLAssert( pszValue != NULL );
00285     CPLAssert( iKeyField >= 0 );
00286     CPLAssert( fp != NULL );
00287     
00288     nTestValue = atoi(pszValue);
00289     
00290     while( !bSelected ) {
00291         papszFields = CSVReadParseLine( fp );
00292         if( papszFields == NULL )
00293             return( NULL );
00294 
00295         if( CSLCount( papszFields ) < iKeyField+1 )
00296         {
00297             /* not selected */
00298         }
00299         else if( eCriteria == CC_Integer
00300                  && atoi(papszFields[iKeyField]) == nTestValue )
00301         {
00302             bSelected = TRUE;
00303         }
00304         else
00305         {
00306             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00307                                     eCriteria );
00308         }
00309 
00310         if( !bSelected )
00311         {
00312             CSLDestroy( papszFields );
00313             papszFields = NULL;
00314         }
00315     }
00316     
00317     return( papszFields );
00318 }
00319 
00320 /************************************************************************/
00321 /*                            CSVScanFile()                             */
00322 /*                                                                      */
00323 /*      Scan a whole file using criteria similar to above, but also     */
00324 /*      taking care of file opening and closing.                        */
00325 /************************************************************************/
00326 
00327 char **CSVScanFile( const char * pszFilename, int iKeyField,
00328                     const char * pszValue, CSVCompareCriteria eCriteria )
00329 
00330 {
00331     CSVTable    *psTable;
00332 
00333 /* -------------------------------------------------------------------- */
00334 /*      Get access to the table.                                        */
00335 /* -------------------------------------------------------------------- */
00336     CPLAssert( pszFilename != NULL );
00337 
00338     if( iKeyField < 0 )
00339         return NULL;
00340 
00341     psTable = CSVAccess( pszFilename );
00342     if( psTable == NULL )
00343         return NULL;
00344 
00345 /* -------------------------------------------------------------------- */
00346 /*      Does the current record match the criteria?  If so, just        */
00347 /*      return it again.                                                */
00348 /* -------------------------------------------------------------------- */
00349     if( iKeyField >= 0
00350         && iKeyField < CSLCount(psTable->papszRecFields)
00351         && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00352     {
00353         return psTable->papszRecFields;
00354     }
00355 
00356 /* -------------------------------------------------------------------- */
00357 /*      Scan the file from the beginning, replacing the ``current       */
00358 /*      record'' in our structure with the one that is found.           */
00359 /* -------------------------------------------------------------------- */
00360     VSIRewind( psTable->fp );
00361     CPLReadLine( psTable->fp );         /* throw away the header line */
00362     
00363     CSLDestroy( psTable->papszRecFields );
00364     psTable->papszRecFields =
00365         CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00366 
00367     return( psTable->papszRecFields );
00368 }
00369 
00370 /************************************************************************/
00371 /*                           CPLGetFieldId()                            */
00372 /*                                                                      */
00373 /*      Read the first record of a CSV file (rewinding to be sure),     */
00374 /*      and find the field with the indicated name.  Returns -1 if      */
00375 /*      it fails to find the field name.  Comparison is case            */
00376 /*      insensitive, but otherwise exact.  After this function has      */
00377 /*      been called the file pointer will be positioned just after      */
00378 /*      the first record.                                               */
00379 /************************************************************************/
00380 
00381 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00382 
00383 {
00384     char        **papszFields;
00385     int         i;
00386     
00387     CPLAssert( fp != NULL && pszFieldName != NULL );
00388 
00389     VSIRewind( fp );
00390 
00391     papszFields = CSVReadParseLine( fp );
00392     for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00393     {
00394         if( EQUAL(papszFields[i],pszFieldName) )
00395         {
00396             CSLDestroy( papszFields );
00397             return i;
00398         }
00399     }
00400 
00401     CSLDestroy( papszFields );
00402 
00403     return -1;
00404 }
00405 
00406 /************************************************************************/
00407 /*                         CSVGetFileFieldId()                          */
00408 /*                                                                      */
00409 /*      Same as CPLGetFieldId(), except that we get the file based      */
00410 /*      on filename, rather than having an existing handle.             */
00411 /************************************************************************/
00412 
00413 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00414 
00415 {
00416     CSVTable    *psTable;
00417     int         i;
00418     
00419 /* -------------------------------------------------------------------- */
00420 /*      Get access to the table.                                        */
00421 /* -------------------------------------------------------------------- */
00422     CPLAssert( pszFilename != NULL );
00423 
00424     psTable = CSVAccess( pszFilename );
00425     if( psTable == NULL )
00426         return -1;
00427 
00428 /* -------------------------------------------------------------------- */
00429 /*      Find the requested field.                                       */
00430 /* -------------------------------------------------------------------- */
00431     for( i = 0;
00432          psTable->papszFieldNames != NULL
00433              && psTable->papszFieldNames[i] != NULL;
00434          i++ )
00435     {
00436         if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00437         {
00438             return i;
00439         }
00440     }
00441 
00442     return -1;
00443 }
00444 
00445 
00446 /************************************************************************/
00447 /*                         CSVScanFileByName()                          */
00448 /*                                                                      */
00449 /*      Same as CSVScanFile(), but using a field name instead of a      */
00450 /*      field number.                                                   */
00451 /************************************************************************/
00452 
00453 char **CSVScanFileByName( const char * pszFilename,
00454                           const char * pszKeyFieldName,
00455                           const char * pszValue, CSVCompareCriteria eCriteria )
00456 
00457 {
00458     int         iKeyField;
00459 
00460     iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00461     if( iKeyField == -1 )
00462         return NULL;
00463 
00464     return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00465 }
00466 
00467 /************************************************************************/
00468 /*                            CSVGetField()                             */
00469 /*                                                                      */
00470 /*      The all-in-one function to fetch a particular field value       */
00471 /*      from a CSV file.  Note this function will return an empty       */
00472 /*      string, rather than NULL if it fails to find the desired        */
00473 /*      value for some reason.  The caller can't establish that the     */
00474 /*      fetch failed.                                                   */
00475 /************************************************************************/
00476 
00477 const char *CSVGetField( const char * pszFilename,
00478                          const char * pszKeyFieldName,
00479                          const char * pszKeyFieldValue,
00480                          CSVCompareCriteria eCriteria,
00481                          const char * pszTargetField )
00482 
00483 {
00484     CSVTable    *psTable;
00485     char        **papszRecord;
00486     int         iTargetField;
00487     
00488 /* -------------------------------------------------------------------- */
00489 /*      Find the table.                                                 */
00490 /* -------------------------------------------------------------------- */
00491     psTable = CSVAccess( pszFilename );
00492     if( psTable == NULL )
00493         return "";
00494 
00495 /* -------------------------------------------------------------------- */
00496 /*      Find the correct record.                                        */
00497 /* -------------------------------------------------------------------- */
00498     papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00499                                      pszKeyFieldValue, eCriteria );
00500 
00501     if( papszRecord == NULL )
00502         return "";
00503 
00504 /* -------------------------------------------------------------------- */
00505 /*      Figure out which field we want out of this.                     */
00506 /* -------------------------------------------------------------------- */
00507     iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00508     if( iTargetField < 0 )
00509         return "";
00510 
00511     if( iTargetField >= CSLCount( papszRecord ) )
00512         return "";
00513 
00514     return( papszRecord[iTargetField] );
00515 }
00516 
00517 /************************************************************************/
00518 /*                            CSVFilename()                             */
00519 /*                                                                      */
00520 /*      Return the full path to a particular CSV file.  This will       */
00521 /*      eventually be something the application can override.           */
00522 /************************************************************************/
00523 
00524 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00525 
00526 const char * CSVFilename( const char *pszBasename )
00527 
00528 {
00529     static char         szPath[512];
00530 
00531     if( pfnCSVFilenameHook == NULL )
00532     {
00533         FILE    *fp = NULL;
00534         const char *pszResult = CPLFindFile( "epsg_csv", pszBasename );
00535 
00536         if( pszResult != NULL )
00537             return pszResult;
00538 
00539         if( getenv("GEOTIFF_CSV") != NULL )
00540         {
00541             sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00542         }
00543         else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00544         {
00545             sprintf( szPath, "csv/%s", pszBasename );
00546         }
00547         else
00548         {
00549             sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00550         }
00551 
00552         if( fp != NULL )
00553             fclose( fp );
00554         
00555         return( szPath );
00556     }
00557     else
00558         return( pfnCSVFilenameHook( pszBasename ) );
00559 }
00560 
00561 /************************************************************************/
00562 /*                         SetCSVFilenameHook()                         */
00563 /*                                                                      */
00564 /*      Applications can use this to set a function that will           */
00565 /*      massage CSV filenames.                                          */
00566 /************************************************************************/
00567 
00612 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00613 
00614 {
00615     pfnCSVFilenameHook = pfnNewHook;
00616 }

doxygen1.2.2 Dimitri van Heesch, © 1997-2000