star-hitran

Load line-by-line data from the HITRAN database
git clone git://git.meso-star.fr/star-hitran.git
Log | Files | Refs | README | LICENSE

shtr_line_list_c.h (6688B)


      1 /* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com)
      2  * Copyright (C) 2025, 2026 Université de Lorraine
      3  * Copyright (C) 2022 Centre National de la Recherche Scientifique
      4  * Copyright (C) 2022 Université Paul Sabatier
      5  *
      6  * This program is free software: you can redistribute it and/or modify
      7  * it under the terms of the GNU General Public License as published by
      8  * the Free Software Foundation, either version 3 of the License, or
      9  * (at your option) any later version.
     10  *
     11  * This program is distributed in the hope that it will be useful,
     12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
     14  * GNU General Public License for more details.
     15  *
     16  * You should have received a copy of the GNU General Public License
     17  * along with this program. If not, see <http://www.gnu.org/licenses/>. */
     18 
     19 #ifndef SHTR_LINE_LIST_C_H
     20 #define SHTR_LINE_LIST_C_H
     21 
     22 #include "shtr.h"
     23 
     24 #include <rsys/dynamic_array.h>
     25 #include <rsys/ref_count.h>
     26 
     27 #include <zlib.h>
     28 
     29 /*
     30  * Brief summary of the design
     31  *
     32  * Since the number of lines can be very large, one of the challenges is to
     33  * reduce the memory footprint. Several line parameters are therefore encoded
     34  * with reduced precision (see “struct line”).
     35  *
     36  * In addition, lines are first stored in a chunk of CHUNK_SIZE bytes which,
     37  * once filled, is compressed using zlib. The compressed data is then stored in
     38  * a fixed-size memory block. Each chunk can be accessed and decompressed
     39  * separately. The memory descriptors for the compressed chunks, i.e., the data
     40  * defining the location where the compressed data is stored in the memory
     41  * blocks (see “struct zchunk”), are stored in a dynamic array in the order in
     42  * which the lines are loaded. Similarly, the memory blocks that contain the
     43  * compressed data are also listed in a dynamic array in the order in which the
     44  * lines are read. Therefore, the index of a line is sufficient to index the
     45  * chunk in which the line is stored, and finally the memory block in which its
     46  * parameters are compressed.
     47  *
     48  * The use of dynamic arrays allows this access by simple indexing, but at the
     49  * cost of memory overhead due to the dynamic array allocation policy (up to
     50  * twice the required size in the worst case). However, the memory space in
     51  * question here is not a major problem, since the zchunk structure and the
     52  * pointer to a memory block take up very little space. And while linked lists
     53  * could have been used instead, they would not only have complicated data
     54  * access, with pointers replacing indexing, but they would also have
     55  * complicated data [de]serialization, precisely because of the use of pointers
     56  * instead of indexes.
     57  *
     58  * Note that the use of memory blocks stored in dynamic arrays rather than a
     59  * simple dynamic array of contiguous bytes is motivated by the issue of
     60  * additional memory overhead associated with the use of dynamic arrays. As said
     61  * above, on the worst case, the memory overhead here is equal to twice the
     62  * number of blocks multiplied by the size of a pointer, compared to twice the
     63  * size required to store all the lines.
     64  *
     65  * A cache is ultimately used to speed up access to lines, which must now be
     66  * decompressed. This cache stores the decompressed blocks in which the most
     67  * recently accessed lines are stored. The implementation of this cache is
     68  * independent of the line storage. However, it must be thread-safe to allow
     69  * simultaneous access.
     70  */
     71 
     72 /* Size in bytes of a memory block in which compressed data is stored */
     73 #define BLOCK_SIZE (1024*1024)
     74 
     75 /* Size in bytes of an uncompressed chunk  */
     76 #define CHUNK_SIZE (64*1024)
     77 
     78 /* Number of lines in a chunk */
     79 #define NLINES_PER_CHUNK (CHUNK_SIZE/sizeof(struct line))
     80 
     81 /* Memory descriptor of a compressed chunk */
     82 struct zchunk {
     83   /* Offset to chunk data. The offset is indicated as if the compressed data
     84    * were stored sequentially. However, the data is stored in memory blocks of
     85    * fixed size. The offset therefore defines both the block index
     86    * (offset/BLOCK_SIZE) and the offset within the block (offset%BLOCK_SIZE) */
     87   size_t offset;
     88 
     89   /* Size in bytes of the compressed chunk */
     90   uint32_t size;
     91 };
     92 #define ZCHUNK_NULL__ {0}
     93 static const struct zchunk ZCHUNK_NULL = ZCHUNK_NULL__;
     94 
     95 struct line {
     96   double wavenumber; /* Central wavenumber in vacuum [cm^-1] */
     97   double intensity; /* Reference intensity [cm^-1/(molec.cm^2)] */
     98   float lower_state_energy; /* [cm^-1] */
     99   float delta_air; /* Air-pressure wavenumber shift [cm^-1.atm^-1] */
    100 
    101   /* Packed data on 4 bytes:
    102    * - gamma_air in fixed precision (integer: 0; fractional: 14)
    103    * - gamma_self in fixed precision (integer: 0; fractional: 14)
    104    * - isotope_id_local on 4 bits.
    105    *
    106    * Note that the The value of the isotopic index is _not_ the value of the
    107    * isotopic index read from the HITRAN file. The original value is in [0, 9]
    108    * with 0 actually meaning 10. Thus, once decoded, the index is located in [1,
    109    * 10]. The next member variable simply stores this index but decremented by
    110    * one in order to make it compatible with C indexing. As a result, it can be
    111    * used directly to index the 'isotopes' array of a 'shtr_molecule' data
    112    * structure loaded from an isotope metadata file */
    113   int32_t gair14_gself14_isoid4;
    114 
    115   /* Temperature-dependent exponent. This is actually a floating-point number
    116    * with the last 7 bits of the mantissa disabled. They store the molecule
    117    * identifier. */
    118   int32_t nair25_molid7;
    119 };
    120 #define LINE_NULL__ {0}
    121 static const struct line LINE_NULL = LINE_NULL__;
    122 
    123 STATIC_ASSERT(sizeof(struct line)==32, Unexpected_sizeof_struct_line);
    124 
    125 /* Generate the dynamic array of zchunk */
    126 #define DARRAY_NAME zchunk
    127 #define DARRAY_DATA struct zchunk
    128 #include <rsys/dynamic_array.h>
    129 
    130 /* Generate he dynamic array of char* */
    131 #define DARRAY_NAME charp
    132 #define DARRAY_DATA char*
    133 #include <rsys/dynamic_array.h>
    134 
    135 /* Version of the line list. One should increment it and perform a version
    136  * management onto serialized data when the line list structure is updated. */
    137 static const int SHTR_LINE_LIST_VERSION = 2;
    138 
    139 /* Forward declaration */
    140 struct cache;
    141 
    142 struct shtr_line_list {
    143   /* Compressed lines sorted in ascending order wrt their wavenumber */
    144   struct darray_zchunk zchunks; /* Accessor to compressed lines */
    145   struct darray_charp blocks; /* Memory where compressed lines are stored */
    146   size_t nlines; /* Number of lines */
    147 
    148   /* Informations on line parameters */
    149   struct shtr_line_list_info info;
    150 
    151   struct cache* cache;
    152 
    153   /* zlib */
    154   z_stream z_stream;
    155   int zlib_is_init;
    156 
    157   struct shtr* shtr;
    158   ref_T ref;
    159 };
    160 
    161 #endif /* SHTR_LINE_LIST_C_H */