star-hitran

Load line-by-line data from the HITRAN database
git clone git://git.meso-star.fr/star-hitran.git
Log | Files | Refs | README | LICENSE

commit b0567f25e66c2a10117e40b4efa7667f3b1f6658
parent 818b197f8e47232d2fd283b6ceafc90dc2c8c14f
Author: Vincent Forest <vincent.forest@meso-star.com>
Date:   Fri,  4 Feb 2022 18:07:38 +0100

Begin loading isotopologues metadata

Diffstat:
Mcmake/CMakeLists.txt | 3++-
Msrc/shitran.c | 1+
Msrc/shitran.h | 29++++++++++++++++++++++++++---
Asrc/shitran_isotopologues.c | 425+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 454 insertions(+), 4 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt @@ -44,7 +44,8 @@ set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH}) set(SHITRAN_FILES_SRC shitran.c - shitran_log.c) + shitran_log.c + shitran_isotopologues.c) set(SHITRAN_FILES_INC shitran_c.h shitran_log.h) diff --git a/src/shitran.c b/src/shitran.c @@ -32,6 +32,7 @@ static void release_shitran(ref_T* ref) { struct shitran* shitran = CONTAINER_OF(ref, struct shitran, ref); + ASSERT(ref); if(shitran->logger == &shitran->logger__) logger_release(&shitran->logger__); MEM_RM(shitran->allocator, shitran); } diff --git a/src/shitran.h b/src/shitran.h @@ -49,12 +49,12 @@ static const struct shitran_create_args SHITRAN_CREATE_ARGS_DEFAULT = /* Forware declarations */ struct shitran; -struct shitran_isotopologue_metadata; +struct shitran_isotopologues; BEGIN_DECLS /******************************************************************************* - * SHITRAN API + * Device API ******************************************************************************/ SHITRAN_API res_T shitran_create @@ -69,7 +69,30 @@ SHITRAN_API res_T shitran_ref_put (struct shitran* shitran); +/******************************************************************************* + * Isotopologues API + ******************************************************************************/ +SHITRAN_API res_T +shitran_isotopologues_load + (struct shitran* shitran, + const char* path, + struct shitran_isotopologues** isotopologues); + +SHITRAN_API res_T +shitran_isotopologues_load_from_stream + (struct shitran* shitran, + FILE* stream, + const char* stream_name, /* NULL <=> use default stream name */ + struct shitran_isotopologues** isotopologues); + +SHITRAN_API res_T +shitran_isotopologues_ref_get + (struct shitran_isotopologues* isotopologues); + +SHITRAN_API res_T +shitran_isotopologues_ref_put + (struct shitran_isotopologues* isotopologues); + END_DECLS #endif /* SHITRAN_H */ - diff --git a/src/shitran_isotopologues.c b/src/shitran_isotopologues.c @@ -0,0 +1,425 @@ +/* Copyright (C) 2022 CNRS - LMD + * Copyright (C) 2022 |Meso|Star> (contact@meso-star.com) + * Copyright (C) 2022 Université Paul Sabatier - IRIT/Laplace + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#define _POSIX_C_SOURCE 200112L /* strtok_r support */ + +#include "shitran.h" +#include "shitran_c.h" +#include "shitran_log.h" + +#include <rsys/cstr.h> +#include <rsys/dynamic_array.h> +#include <rsys/hash_table.h> +#include <rsys/ref_count.h> +#include <rsys/str.h> +#include <rsys/text_reader.h> + +#include <ctype.h> +#include <string.h> + +struct isotope { + double abundance; + double Q; /* At 296 K */ + double gj; + double molar_mass; /* In g */ + size_t molecule; /* Index of the molecule to which the isotope belongs */ + int id; /* Unique identifier of the isotope */ +}; + +/* Generate the dynamic array of isotopes */ +#define DARRAY_NAME isotope +#define DARRAY_DATA struct isotope +#include <rsys/dynamic_array.h> + +struct molecule { + struct str name; + size_t isotopes_range[2]; /* Range of the 1st and last regisered isotopes */ + int id; /* Unique identifier of the molecule */ +}; +#define MOLECULE_IS_VALID(Molecule) ((Molecule)->id >= 0) + +static INLINE void +molecule_clear(struct molecule* molecule) +{ + ASSERT(molecule); + molecule->isotopes_range[0] = SIZE_MAX; + molecule->isotopes_range[1] = 0; + molecule->id = -1; +} + +static INLINE void +molecule_init(struct mem_allocator* allocator, struct molecule* molecule) +{ + str_init(allocator, &molecule->name); + molecule_clear(molecule); +} + +static INLINE void +molecule_release(struct molecule* molecule) +{ + str_release(&molecule->name); +} + +static INLINE res_T +molecule_copy(struct molecule* dst, const struct molecule* src) +{ + dst->isotopes_range[0] = src->isotopes_range[0]; + dst->isotopes_range[1] = src->isotopes_range[1]; + dst->id = src->id; + return str_copy(&dst->name, &src->name); +} + +static INLINE res_T +molecule_copy_and_release(struct molecule* dst, struct molecule* src) +{ + dst->isotopes_range[0] = src->isotopes_range[0]; + dst->isotopes_range[1] = src->isotopes_range[1]; + dst->id = src->id; + return str_copy_and_release(&dst->name, &src->name); +} + +/* Generate the dynamic array of molecules */ +#define DARRAY_NAME molecule +#define DARRAY_DATA struct molecule +#define DARRAY_FUNCTOR_INIT molecule_init +#define DARRAY_FUNCTOR_RELEASE molecule_release +#define DARRAY_FUNCTOR_COPY molecule_copy +#define DARRAY_FUNCTOR_COPY_AND_RELEASE molecule_copy_and_release +#include <rsys/dynamic_array.h> + +/* Generate the hash table that map a unique identifier to its index */ +#define HTABLE_NAME id2entry +#define HTABLE_KEY int /* Unique identifier */ +#define HTABLE_DATA size_t /* Index of the corresponding registered data */ +#include <rsys/hash_table.h> + +struct shitran_isotopologues { + + /* List of molecules and isotopes */ + struct darray_molecule molecules; + struct darray_isotope isotopes; + + /* Map the identifier of a molecule/isotope to its correspond index into + * their corresponding dynamic arays into which they are registered */ + struct htable_id2entry molid2idx; + struct htable_id2entry isoid2idx; + + struct shitran* shitran; + ref_T ref; +}; + +/******************************************************************************* + * Helper functions + ******************************************************************************/ +static res_T +create_isotoplogues + (struct shitran* shitran, + struct shitran_isotopologues** out_isotopologues) +{ + struct shitran_isotopologues* isotopologues = NULL; + res_T res = RES_OK; + ASSERT(shitran && out_isotopologues); + + isotopologues = MEM_CALLOC(shitran->allocator, 1, sizeof(*isotopologues)); + if(!isotopologues) { + log_err(shitran, "Could not allocate the isotopologues data structure.\n"); + res = RES_MEM_ERR; + goto error; + } + ref_init(&isotopologues->ref); + SHITRAN(ref_get(shitran)); + isotopologues->shitran = shitran; + darray_molecule_init(shitran->allocator, &isotopologues->molecules); + darray_isotope_init(shitran->allocator, &isotopologues->isotopes); + htable_id2entry_init(shitran->allocator, &isotopologues->molid2idx); + htable_id2entry_init(shitran->allocator, &isotopologues->isoid2idx); + +exit: + *out_isotopologues = isotopologues; + return res; +error: + goto exit; +} + +static res_T +flush_molecule + (struct shitran_isotopologues* isotopologues, + struct molecule* molecule, /* Currently parsed molecule */ + struct txtrdr* txtrdr) +{ + size_t entry = 0; + res_T res = RES_OK; + ASSERT(isotopologues && molecule && MOLECULE_IS_VALID(molecule)); + + /* Fetch _exclusive_ upper bound */ + molecule->isotopes_range[1] = darray_isotope_size_get(&isotopologues->isotopes); + if(molecule->isotopes_range[0] >= molecule->isotopes_range[1]) { + log_warn(isotopologues->shitran, + "%s: the %s molecule does not have any isotopes.\n", + txtrdr_get_name(txtrdr), str_cget(&molecule->name)); + } + + /* Fetch the index of the registered molecule */ + entry = darray_molecule_size_get(&isotopologues->molecules); + + /* Storing the molecule */ + res = darray_molecule_push_back(&isotopologues->molecules, molecule); + if(res != RES_OK) { + log_err(isotopologues->shitran, + "%s: error storing the %s molecule -- %s.\n", + txtrdr_get_name(txtrdr), str_cget(&molecule->name), res_to_cstr(res)); + goto error; + } + + /* Registering the molecule */ + ASSERT(!htable_id2entry_find(&isotopologues->molid2idx, &molecule->id)); + res = htable_id2entry_set(&isotopologues->molid2idx, &molecule->id, &entry); + if(res != RES_OK) { + log_err(isotopologues->shitran, + "%s: error registering the %s molecule -- %s.\n", + txtrdr_get_name(txtrdr), str_cget(&molecule->name), res_to_cstr(res)); + goto error; + } + + molecule_clear(molecule); + +exit: + return res; +error: + goto exit; +} + +static res_T +parse_molecule + (struct shitran_isotopologues* isotopologues, + struct molecule* molecule, + struct txtrdr* txtrdr) +{ + char* name = NULL; + char* id = NULL; + char* tk = NULL; + char* tk_ctx = NULL; + size_t len; + res_T res = RES_OK; + ASSERT(molecule && txtrdr); + + name = strtok_r(txtrdr_get_line(txtrdr), " \t", &tk_ctx); + id = strtok_r(NULL, " \t", &tk_ctx); + + if(!name) { + log_err(isotopologues->shitran, "%s:%lu: molecule name is missing.\n", + txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr)); + res = RES_BAD_ARG; + goto error; + } + + len = strlen(id); + if(!id || !len || id[0] != '(' || id[len-1] != ')') { + log_err(isotopologues->shitran, "%s:%lu: invalid molecule identifier.\n", + txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr)); + res = RES_BAD_ARG; + goto error; + } + + id[len-1] = '\0'; /* Rm trailing parenthesis */ + res = cstr_to_int(id+1/*Rm leading parenthesis*/, &molecule->id); + if(res != RES_OK || !MOLECULE_IS_VALID(molecule)) { + id[len-1] = ')'; /* Re-add the trailing parenthesis */ + log_err(isotopologues->shitran, "%s:%lu: invalid molecule identifier `%s'.\n", + txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr), id); + res = RES_BAD_ARG; + goto error; + } + + tk = strtok_r(NULL, " \t", &tk_ctx); + if(tk) { + log_warn(isotopologues->shitran, "%s:%lu: unexpected text `%s'.\n", + txtrdr_get_name(txtrdr), (unsigned long)txtrdr_get_line_num(txtrdr), tk); + } + +exit: + return res; +error: + goto exit; +} + +static res_T +parse_line + (struct shitran_isotopologues* isotopologues, + struct molecule* molecule, /* Currently parsed molecule */ + struct txtrdr* txtrdr) +{ + const char* line = NULL; + size_t i; + res_T res = RES_OK; + ASSERT(isotopologues && molecule && txtrdr); + + line = txtrdr_get_cline(txtrdr); + ASSERT(line); + i = strcspn(line, " \t"); + ASSERT(i < strlen(line)); + + if(isalpha(line[i])) { + if(MOLECULE_IS_VALID(molecule)) { + res = flush_molecule(isotopologues, molecule, txtrdr); + if(res != RES_OK) goto error; + } + res = parse_molecule(isotopologues, molecule, txtrdr); + if(res != RES_OK) goto error; + } else { + /* TODO parse the isotope */ + } + +exit: + return res; +error: + goto exit; +} + +static res_T +load_stream + (struct shitran* shitran, + FILE* stream, + const char* name, + struct shitran_isotopologues** out_isotopologues) +{ + struct molecule molecule; /* Current molecule */ + struct shitran_isotopologues* isotopologues = NULL; + struct txtrdr* txtrdr = NULL; + res_T res = RES_OK; + ASSERT(shitran && stream && name && out_isotopologues); + + molecule_init(shitran->allocator, &molecule); + + res = create_isotoplogues(shitran, &isotopologues); + if(res != RES_OK) goto error; + + res = txtrdr_stream(isotopologues->shitran->allocator, stream, name, + 0/*comment char*/, &txtrdr); + if(res != RES_OK) { + log_err(shitran, "%s: error creating the text reader -- %s.\n", + name, res_to_cstr(res)); + goto error; + } + + #define READ_LINE { \ + res = txtrdr_read_line(txtrdr); \ + if(res != RES_OK) { \ + log_err(shitran, "%s: error reading the line `%lu' -- %s.\n", \ + name, (unsigned long)txtrdr_get_line_num(txtrdr), res_to_cstr(res)); \ + goto error; \ + } \ + } (void)0 + + /* Skip the 1st line that is a comment line*/ + READ_LINE; + if(!txtrdr_get_cline(txtrdr)) goto exit; + + for(;;) { + READ_LINE; + + if(!txtrdr_get_cline(txtrdr)) break; /* No more parsed line */ + res = parse_line(isotopologues, &molecule, txtrdr); + if(res != RES_OK) goto error; + } + #undef READ_LINE + +exit: + *out_isotopologues = isotopologues; + molecule_release(&molecule); + return res; +error: + goto exit; +} + +static void +release_isotopologues(ref_T* ref) +{ + struct shitran* shitran = NULL; + struct shitran_isotopologues* isotopologues = CONTAINER_OF + (ref, struct shitran_isotopologues, ref); + ASSERT(ref); + shitran = isotopologues->shitran; + darray_molecule_release(&isotopologues->molecules); + darray_isotope_release(&isotopologues->isotopes); + htable_id2entry_release(&isotopologues->molid2idx); + htable_id2entry_release(&isotopologues->isoid2idx); + MEM_RM(shitran->allocator, isotopologues); + SHITRAN(ref_put(shitran)); +} + +/******************************************************************************* + * Exported functions + ******************************************************************************/ +res_T +shitran_isotopologues_load + (struct shitran* shitran, + const char* path, + struct shitran_isotopologues** isotopologues) +{ + FILE* file = NULL; + res_T res = RES_OK; + + if(!shitran || !path) { + res = RES_BAD_ARG; + goto error; + } + + file = fopen(path, "r"); + if(!file) { + log_err(shitran, "%s: error opening file `%s'.\n", FUNC_NAME, path); + res = RES_IO_ERR; + goto error; + } + + res = load_stream(shitran, file, path, isotopologues); + if(res != RES_OK) goto error; + +exit: + if(file) fclose(file); + return res; +error: + goto exit; +} + +res_T +shitran_isotopologues_load_from_stream + (struct shitran* shitran, + FILE* stream, + const char* stream_name, + struct shitran_isotopologues** isotopologues) +{ + if(!shitran || !stream) return RES_BAD_ARG; + return load_stream + (shitran, stream, stream_name ? stream_name : "<stream>", isotopologues); +} + +res_T +shitran_isotopologues_ref_get(struct shitran_isotopologues* isotopologues) +{ + if(!isotopologues) return RES_BAD_ARG; + ref_get(&isotopologues->ref); + return RES_OK; +} + +res_T +shitran_isotopologues_ref_put(struct shitran_isotopologues* isotopologues) +{ + if(!isotopologues) return RES_BAD_ARG; + ref_put(&isotopologues->ref, release_isotopologues); + return RES_OK; +}