diff --git a/lookup_formulas.c b/lookup_formulas.c new file mode 100644 index 0000000000000000000000000000000000000000..29b69e4c4c9135d99b58123ac826beff86441109 --- /dev/null +++ b/lookup_formulas.c @@ -0,0 +1,787 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define limC 3000 +#define limH 30000 +#define limO 400 +#define limN 200 +#define limS 120 +#define limP 100 +#define limCl 10 +// Check if product of all these numbers fit in uint64 + + +#define MAX_MASS 2000 +#define MAX_RESULTS 500000 +#define MAX_ELE 32 +#define MAX_SEARCH 5000000 +#define MAX_SPECS 2 // how many special elements allowed +#define TOLMUL 1.0E7 +#define TOLDIV 10000000LL + + +int accepted = 0; +int skipped = 0; +int rejected[] = {0,0,0}; +int tried = 0; + +// List of weights to search +int64_t searchweights[MAX_SEARCH]; + +// Used to store results +typedef struct { + uint64_t orgsearch; + uint64_t diff; + int a[MAX_ELE]; + int idx; +} Result; +Result results[MAX_RESULTS]; +int last_result = 0; + + +// Dataformat in binary files +typedef struct { + uint64_t mass; + uint64_t formel; +} Moli; + + +// used to store binary file in memory +typedef struct { + int64_t last; // list length + Moli* a; // list of moli tuples +} Moli1; +Moli1* mols[MAX_MASS]; // array of lists to store mass and formula tuples + + +// Base definitions of elements +typedef struct { + char formel[10]; + uint64_t mass; + int valence; + // int defining group + // 2: Isotope replacing other C13 <-> C + // 1: Metal - used with -m or max 2 metals + int spec0; +} Element; + + +// struct for elements describing loop and other props +typedef struct { + char formel[10]; // Just String for formula, C13 + int cFrom; // Min number of elements in resuilt + int cTo; // Max Number in result + char* ref; // Refered Element for replacements (c13 <-> C) + int iref; // index of refered Element (calculated) + int64_t delta; // mass difference if added or replaced, so mass for added elements, C13-C for replacements + Element e; // reference to element +} Count; + + +Element elements[] = { + { "C" , 12000000000 , 4, 0 }, // 0 + { "H" , 1007825000 , 1, 0 }, // 1 + { "O" , 15994914619 , 2, 0 }, // 2 + { "N" , 14003074004 , 5, 0 }, // 3 + { "S" , 31972072000 , 6, 0 }, // 4 + { "P" , 30973763000 , 5, 0 }, // 5 + { "Cl" , 34968852700 , 1, 1 }, // 6 + { "Na" , 22989769280 , 1, 1 }, // 7 + { "C13" , 13003354835 , 4, 2 }, // 8 + { "O18" , 17999161000 , 2, 2 }, // 9 + { "N15" , 15000108899 , 5, 2 }, // 10 + { "S34" , 33967866900 , 6, 2 }, // 11 + { "Cl37" , 36965902590 , 1, 1 }, // 11 + { "Co" , 58933195000 , 2, 1 }, // 12 + { "Cu.i" , 62929597720 , 1, 1 }, // 13 + { "Cu.ii" , 62929597720 , 2, 1 }, // 14 + { "Cu65.i" , 64927789500 , 1, 1 }, // 15 + { "Cu65.ii", 64927789500 , 2, 1 }, // 16 + { "Fe.ii" , 55934936000 , 2, 1 }, // 17 + { "Fe.iii", 55934936000 , 3, 1 }, // 18 + { "Fe54.ii" , 53939609000 , 2, 1 }, // 19 + { "Fe54.iii", 53939609000 , 3, 1 }, // 20 + { "Br79" , 78918338000 , 1, 1 }, // 21 + { "Br81" , 80916900000 , 1, 1 }, // 22 + { "Ni.ii" , 57935346200 , 2, 1 }, // 23 + { "Ni60.ii" , 59930786400 , 2, 1 }, // 24 + { "Zn.ii" , 65380000000 , 2, 1 }, // 25 + { "Zn66.ii" , 65926033400 , 2, 1 }, // 26 + { "I" , 126904470000 , 1, 1 } }; // 27 +int NBR_ELEMENTS= sizeof(elements)/sizeof(Element); + +#define COUNTS_OFFSET 7 // index of first not pregenerated element + + +Count molstd_counts[] = { + { "C" , 0, 80, NULL }, + { "H" , 0, 400, NULL }, + { "O" , 0, 40, NULL }, + { "N" , 0, 30, NULL }, + { "S" , 0, 15, NULL }, + { "P" , 0, 20, NULL }, + { "Cl" , 0, 2, NULL }, +}; +int NBRSTD_COUNTS= sizeof(molstd_counts)/sizeof(Count); + +// All the counts with min and max for the additinal elements, missing cols added in init_alternatives +Count mol_counts[] = { + { "Na" , 0, 2, NULL }, + { "C13" , 0, 3, "C" }, + { "O18" , 0, 1, "O" }, + { "N15" , 0, 1, "N" }, + { "S34" , 0, 1, "S" }, + { "Cl37" , 0, 1, "Cl" }, + { "Co" , 0, 2, NULL }, + { "Cu.i" , 0, 2, NULL }, + { "Cu.ii" , 0, 2, NULL }, + { "Cu65.i" ,0, 2, "Cu.i" }, + { "Cu65.ii" ,0, 2, "Cu.ii" }, + { "Fe.ii" , 0, 2, NULL }, + { "Fe.iii" , 0, 2, NULL }, + { "Fe54.ii" ,0, 2, "Fe.ii" }, + { "Fe54.iii" ,0, 2, "Fe.iii" }, + { "Br79" , 0, 2, NULL }, + { "Br81" , 0, 2, NULL }, + { "Ni.ii" , 0, 2, NULL }, + { "Ni60.ii" ,0, 2, "Ni.ii" }, + { "Zn.ii" , 0, 2, NULL }, + { "Zn66.ii" ,0, 2, "Zn.ii" }, + { "I" , 0, 2, NULL }, +}; +int NBR_COUNTS= sizeof(mol_counts)/sizeof(Count); +int cur_mol[MAX_ELE - COUNTS_OFFSET]; // array with current element numbers +uint64_t cur_mol_nbr = 0; + +int DO_DEBUG = 0; + + +// ------------------------------------------------------------------------- +void print_table_results(); + + +// ------------------------------------------------------------------------- +// +char* aF2csvlist(char* f, int* aF) { + f[0]= '\0'; + for (int i=0; i0) { + m = m + aF[i]*elements[i].mass; + } + } + return m; +} + + +int val_aF(int* aF) { + int v= 0; + for (int i = 0; i < NBR_ELEMENTS; ++i) { + if (aF[i]>0) { + v = v + aF[i]*elements[i].valence; + } + } + return v; +} + + +double calc_hc(int* aF) { + return (double)aF[1] / (double)(aF[0]+aF[8]); +} + + +double calc_oc(int* aF) { + return (double)aF[2] / (double)(aF[0]+aF[8]); +} + + +int check_mol(int* aF) { + for (int i=0; iMAX_SPECS) { + rejected[2] += 1; + return 0; + } + + accepted += 1; + return 1; +} + + + +Moli1* read_binfile(char* infilename) { + FILE* fp; + int64_t vers= 0x000000000001L; + Moli1* m; + uint64_t cnt; + + m= (Moli1*)malloc(sizeof(Moli1)); + + if( (fp=fopen(infilename,"rb")) == NULL) { + m->last= 0; + m->a= NULL; + return m; + } + + cnt = fread(&(m->last), sizeof(int64_t), 1, fp); + if (cnt!=1) exit(2); + cnt = fread(&vers, sizeof(int64_t), 1, fp); + if (cnt!=1) exit(2); + m->a= (Moli*)malloc(sizeof(Moli1) * m->last); + cnt = fread(m->a, sizeof(Moli), m->last, fp); + if (cnt!=m->last) { + fprintf(stderr, "File %s, Expected %ld lines, found %ld\n", + infilename, m->last, cnt ); + exit(2); + } + if (DO_DEBUG) fprintf(stderr, " Read %s, nbr of entries:%ld\n", infilename, m->last); + + fclose(fp); + return m; +} + + +Moli1* read_filesfor(int idx, int64_t w) { + char fn[256]; + int iweight; + int cnt_cache = 0; + + iweight = weight2int(w); + // if (DO_DEBUG) fprintf(stderr, "read files for %ld (%d)\n", w, iweight); + + if (iweight<0 || iweight>MAX_MASS) { + fprintf(stderr, "Weight %ld out of range for %d\n", w, idx); + exit(9); + } + + if (mols[iweight] && mols[iweight]->last) { + return mols[iweight]; + } + + // Count already read lists + cnt_cache = 0; + for (int i=0; ilast) { + cnt_cache += 1; + } + } + if (cnt_cache>200 && iweight>5 && iweight190); i++) { + if (mols[i] && mols[i]->last) { + if (DO_DEBUG) fprintf(stderr, " Free %4d %ld\n", i, mols[i]->last); + free(mols[i]->a); + free(mols[i]); + mols[i]= NULL; + cnt_cache -= 1; + } + } + } + if (iweight>0 && iweight=MAX_RESULTS) { + print_table_results(); + } + results[last_result].idx = idx; + results[last_result].orgsearch = searchmass; + if (mass>searchmass) + results[last_result].diff = mass-searchmass; + else + results[last_result].diff = searchmass-mass; + + if (results[last_result].diff > 10000000) { + fprintf(stderr, "Diff zu gross %ld (%ld, %ld)\n", results[last_result].diff, mass, searchmass); + exit(9); + } + for (int i=0; i= target +uint64_t search_start_in_list(uint64_t target, Moli1* m, uint64_t first, uint64_t last) { + int test; + if (first+1 >= last) return first; + + if (target > m->a[first].mass) { + test = (first+last) / 2; + if (target >= m->a[test].mass) { + return search_start_in_list(target, m, test, last); + } else { + return search_start_in_list(target, m, first, test); + } + } else { + if (first==0 || target < m->a[first-1].mass) return first; + } +} + + +void search_weight_db(int idx, + uint64_t orgsearch, uint64_t searchweight, + uint64_t tolerance) { + int aF[MAX_ELE]; + int valid; + uint64_t i_mass; + uint64_t tol = orgsearch * tolerance / TOLDIV; + Moli1* m = read_filesfor(idx, searchweight); + + if (!m) { + fprintf(stderr, "No list found for %ld (%d)\n", searchweight, weight2int(searchweight)); + return; + } + + // search for first element larger org-tol + uint64_t target = searchweight - tol; + uint64_t start = search_start_in_list(target, m, 0, m->last); + // printf("search %ld - %ld\n", searchweight - tol, searchweight + tol); + // run + for (uint64_t i=start; i< m->last && m->a[i].mass < searchweight+tol; i++) { + if (labs(m->a[i].mass - searchweight) < tol) { + tried += 1; + int2aF(aF, m->a[i].formel); + // Check mol with current limits + valid = 1; + for (int j=0; jmolstd_counts[j].cTo)) { + // printf("skipped at j=%d, aF=%d\n", j, aF[j]); + valid = 0; + skipped += 1; + // activate only for detailed debugs, lots of output!!!! + if (0 && DO_DEBUG) { + char fsum[256]; + aF2formel(fsum, aF); + fprintf(stderr, "skip(at %s(j=%d), #=%d[%d,%d]) %s\n", + molstd_counts[j].formel, j, aF[j], molstd_counts[j].cFrom,molstd_counts[j].cTo, fsum); + } + } + } + if (valid) { + for (int j=0; j 11000000) { + fprintf(stderr, "Diff zwischen %ld %ld (sw=%ld, d=%ld (%lld))\n", i_mass, orgsearch, searchweight, i_mass-orgsearch, searchweight/1000000000LL); + } else { + push_result(idx, orgsearch, i_mass, aF); + } + } + } + } + } +} + + +char* specs2str(char* s) { + s[0] = '\0'; + + for (int i = 0; i < NBR_COUNTS; ++i) { + if (cur_mol[i]>0) + sprintf(s+strlen(s), "%s_%d ", mol_counts[i].e.formel, cur_mol[i]); + } + s[strlen(s)-1]='\0'; + return s; +} + +int count_specs() { + int nbr = 0; + + for (int i = 0; i < NBR_COUNTS; ++i) { + if (mol_counts[i].e.spec0 == 1) { + nbr+= cur_mol[i]; + } + } + return nbr; +} + + +uint64_t mass_specs() { + uint64_t m = 0; + for (int i = 0; i < NBR_COUNTS; ++i) { + m += cur_mol[i] * mol_counts[i].delta; + } + return m; +} + + +int next_specs() { + int nbr = count_specs(); + + for (int i = 0; i < NBR_COUNTS; ++i) { + if (cur_mol[i] < mol_counts[i].cTo && + (nbr < MAX_SPECS || + (nbr==MAX_SPECS && mol_counts[i].e.spec0 != 1))) { + cur_mol[i] += 1; + cur_mol_nbr+= 1; + return 1; + } else { + cur_mol[i] = mol_counts[i].cFrom; + nbr = count_specs(); + } + } + return 0; +} + + +void init_specs() { + for (int i = 0; i < NBR_COUNTS; ++i) { + cur_mol[i]= mol_counts[i].cFrom; + } + cur_mol_nbr = 0; +} + + +void search_weight(int idx, + uint64_t searchweight, + uint64_t tolerance) { + char s[256]; + uint64_t spec_mass; + init_specs(); + + search_weight_db(idx, searchweight, searchweight, tolerance); + while (next_specs()) { + spec_mass = mass_specs(); + // Minimale mass to look for: 12 + if (spec_mass+elements[0].mass0 && sv=MAX_SEARCH) { + fprintf(stderr, "Too many Masses %d", idx); + exit(2); + } + } + searchweights[idx]=0; + if (DO_DEBUG) { + fprintf(stderr, "READ %d SEARCHES\n", idx); + } + return idx; +} + + +void print_counts() { + for (int i=0; i