shtr_line_list_c.h (6688B)
1 /* Copyright (C) 2022, 2025, 2026 |Méso|Star> (contact@meso-star.com) 2 * Copyright (C) 2025, 2026 Université de Lorraine 3 * Copyright (C) 2022 Centre National de la Recherche Scientifique 4 * Copyright (C) 2022 Université Paul Sabatier 5 * 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 #ifndef SHTR_LINE_LIST_C_H 20 #define SHTR_LINE_LIST_C_H 21 22 #include "shtr.h" 23 24 #include <rsys/dynamic_array.h> 25 #include <rsys/ref_count.h> 26 27 #include <zlib.h> 28 29 /* 30 * Brief summary of the design 31 * 32 * Since the number of lines can be very large, one of the challenges is to 33 * reduce the memory footprint. Several line parameters are therefore encoded 34 * with reduced precision (see “struct line”). 35 * 36 * In addition, lines are first stored in a chunk of CHUNK_SIZE bytes which, 37 * once filled, is compressed using zlib. The compressed data is then stored in 38 * a fixed-size memory block. Each chunk can be accessed and decompressed 39 * separately. The memory descriptors for the compressed chunks, i.e., the data 40 * defining the location where the compressed data is stored in the memory 41 * blocks (see “struct zchunk”), are stored in a dynamic array in the order in 42 * which the lines are loaded. Similarly, the memory blocks that contain the 43 * compressed data are also listed in a dynamic array in the order in which the 44 * lines are read. Therefore, the index of a line is sufficient to index the 45 * chunk in which the line is stored, and finally the memory block in which its 46 * parameters are compressed. 47 * 48 * The use of dynamic arrays allows this access by simple indexing, but at the 49 * cost of memory overhead due to the dynamic array allocation policy (up to 50 * twice the required size in the worst case). However, the memory space in 51 * question here is not a major problem, since the zchunk structure and the 52 * pointer to a memory block take up very little space. And while linked lists 53 * could have been used instead, they would not only have complicated data 54 * access, with pointers replacing indexing, but they would also have 55 * complicated data [de]serialization, precisely because of the use of pointers 56 * instead of indexes. 57 * 58 * Note that the use of memory blocks stored in dynamic arrays rather than a 59 * simple dynamic array of contiguous bytes is motivated by the issue of 60 * additional memory overhead associated with the use of dynamic arrays. As said 61 * above, on the worst case, the memory overhead here is equal to twice the 62 * number of blocks multiplied by the size of a pointer, compared to twice the 63 * size required to store all the lines. 64 * 65 * A cache is ultimately used to speed up access to lines, which must now be 66 * decompressed. This cache stores the decompressed blocks in which the most 67 * recently accessed lines are stored. The implementation of this cache is 68 * independent of the line storage. However, it must be thread-safe to allow 69 * simultaneous access. 70 */ 71 72 /* Size in bytes of a memory block in which compressed data is stored */ 73 #define BLOCK_SIZE (1024*1024) 74 75 /* Size in bytes of an uncompressed chunk */ 76 #define CHUNK_SIZE (64*1024) 77 78 /* Number of lines in a chunk */ 79 #define NLINES_PER_CHUNK (CHUNK_SIZE/sizeof(struct line)) 80 81 /* Memory descriptor of a compressed chunk */ 82 struct zchunk { 83 /* Offset to chunk data. The offset is indicated as if the compressed data 84 * were stored sequentially. However, the data is stored in memory blocks of 85 * fixed size. The offset therefore defines both the block index 86 * (offset/BLOCK_SIZE) and the offset within the block (offset%BLOCK_SIZE) */ 87 size_t offset; 88 89 /* Size in bytes of the compressed chunk */ 90 uint32_t size; 91 }; 92 #define ZCHUNK_NULL__ {0} 93 static const struct zchunk ZCHUNK_NULL = ZCHUNK_NULL__; 94 95 struct line { 96 double wavenumber; /* Central wavenumber in vacuum [cm^-1] */ 97 double intensity; /* Reference intensity [cm^-1/(molec.cm^2)] */ 98 float lower_state_energy; /* [cm^-1] */ 99 float delta_air; /* Air-pressure wavenumber shift [cm^-1.atm^-1] */ 100 101 /* Packed data on 4 bytes: 102 * - gamma_air in fixed precision (integer: 0; fractional: 14) 103 * - gamma_self in fixed precision (integer: 0; fractional: 14) 104 * - isotope_id_local on 4 bits. 105 * 106 * Note that the The value of the isotopic index is _not_ the value of the 107 * isotopic index read from the HITRAN file. The original value is in [0, 9] 108 * with 0 actually meaning 10. Thus, once decoded, the index is located in [1, 109 * 10]. The next member variable simply stores this index but decremented by 110 * one in order to make it compatible with C indexing. As a result, it can be 111 * used directly to index the 'isotopes' array of a 'shtr_molecule' data 112 * structure loaded from an isotope metadata file */ 113 int32_t gair14_gself14_isoid4; 114 115 /* Temperature-dependent exponent. This is actually a floating-point number 116 * with the last 7 bits of the mantissa disabled. They store the molecule 117 * identifier. */ 118 int32_t nair25_molid7; 119 }; 120 #define LINE_NULL__ {0} 121 static const struct line LINE_NULL = LINE_NULL__; 122 123 STATIC_ASSERT(sizeof(struct line)==32, Unexpected_sizeof_struct_line); 124 125 /* Generate the dynamic array of zchunk */ 126 #define DARRAY_NAME zchunk 127 #define DARRAY_DATA struct zchunk 128 #include <rsys/dynamic_array.h> 129 130 /* Generate he dynamic array of char* */ 131 #define DARRAY_NAME charp 132 #define DARRAY_DATA char* 133 #include <rsys/dynamic_array.h> 134 135 /* Version of the line list. One should increment it and perform a version 136 * management onto serialized data when the line list structure is updated. */ 137 static const int SHTR_LINE_LIST_VERSION = 2; 138 139 /* Forward declaration */ 140 struct cache; 141 142 struct shtr_line_list { 143 /* Compressed lines sorted in ascending order wrt their wavenumber */ 144 struct darray_zchunk zchunks; /* Accessor to compressed lines */ 145 struct darray_charp blocks; /* Memory where compressed lines are stored */ 146 size_t nlines; /* Number of lines */ 147 148 /* Informations on line parameters */ 149 struct shtr_line_list_info info; 150 151 struct cache* cache; 152 153 /* zlib */ 154 z_stream z_stream; 155 int zlib_is_init; 156 157 struct shtr* shtr; 158 ref_T ref; 159 }; 160 161 #endif /* SHTR_LINE_LIST_C_H */