Merge pull request #1257 from jennifermliu/fastCover

Increment frequency for every dmer occurence within same sample
This commit is contained in:
Nick Terrell 2018-07-30 16:03:18 -07:00 committed by GitHub
commit fc685d6b94
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 101 additions and 109 deletions

View File

@ -18,109 +18,109 @@ make ARG="in=../../../lib/dictBuilder in=../../../lib/compress"
- Fourth column is chosen d and fifth column is chosen k
github:
NODICT 0.000005 2.999642
RANDOM 0.141553 8.786957
LEGACY 0.904340 8.989482
COVER 53.621302 10.641263 8 1298
COVER 4.085037 10.641263 8 1298
FAST15 17.636211 10.586461 8 1778
FAST15 0.221236 10.586461 8 1778
FAST16 18.716259 10.492503 6 1778
FAST16 0.251522 10.492503 6 1778
FAST17 17.614391 10.611737 8 1778
FAST17 0.241011 10.611737 8 1778
FAST18 19.926270 10.621586 8 1778
FAST18 0.287195 10.621586 8 1778
FAST19 19.626808 10.629626 8 1778
FAST19 0.340191 10.629626 8 1778
FAST20 18.918657 10.610308 8 1778
FAST20 0.463307 10.610308 8 1778
FAST21 20.502362 10.625733 8 1778
FAST21 0.638202 10.625733 8 1778
FAST22 22.702695 10.625281 8 1778
FAST22 1.353399 10.625281 8 1778
FAST23 28.041990 10.602342 8 1778
FAST23 3.029502 10.602342 8 1778
FAST24 35.662961 10.603379 8 1778
FAST24 6.524258 10.603379 8 1778
NODICT 0.000004 2.999642
RANDOM 0.161907 8.786957
LEGACY 0.960128 8.989482
COVER 69.031037 10.641263 8 1298
COVER 7.017782 10.641263 8 1298
FAST15 24.710713 10.547583 8 1874
FAST15 0.271657 10.547583 8 1874
FAST16 23.906902 10.690723 8 1106
FAST16 0.315039 10.690723 8 1106
FAST17 25.384572 10.642322 8 1106
FAST17 0.319237 10.642322 8 1106
FAST18 21.935494 10.491283 8 1826
FAST18 0.255488 10.491283 8 1826
FAST19 21.349385 10.522182 8 1826
FAST19 0.311369 10.522182 8 1826
FAST20 23.124955 10.487431 8 1826
FAST20 0.317411 10.487431 8 1826
FAST21 27.311387 10.491047 8 1778
FAST21 0.398483 10.491047 8 1778
FAST22 23.993620 10.502191 8 1826
FAST22 0.329767 10.502191 8 1826
FAST23 27.793381 10.502191 8 1826
FAST23 0.359659 10.502191 8 1826
FAST24 29.281399 10.509461 8 1826
FAST24 0.398369 10.509461 8 1826
hg-commands:
NODICT 0.000005 2.425291
RANDOM 0.080469 3.489515
LEGACY 0.794417 3.911896
COVER 54.198788 4.131136 8 386
COVER 2.191729 4.131136 8 386
FAST15 11.852793 3.903719 6 1106
FAST15 0.175406 3.903719 6 1106
FAST16 12.863315 4.005077 8 530
FAST16 0.158410 4.005077 8 530
FAST17 11.977917 4.097811 8 818
FAST17 0.162381 4.097811 8 818
FAST18 11.749304 4.136081 8 770
FAST18 0.173242 4.136081 8 770
FAST19 11.905785 4.166021 8 530
FAST19 0.186403 4.166021 8 530
FAST20 13.293999 4.163740 8 482
FAST20 0.241508 4.163740 8 482
FAST21 16.623177 4.157057 8 434
FAST21 0.372647 4.157057 8 434
FAST22 20.918409 4.158195 8 290
FAST22 0.570431 4.158195 8 290
FAST23 21.762805 4.161450 8 434
FAST23 1.162206 4.161450 8 434
FAST24 29.133745 4.159658 8 338
FAST24 3.054376 4.159658 8 338
NODICT 0.000007 2.425291
RANDOM 0.083477 3.489515
LEGACY 0.941867 3.911896
COVER 67.314295 4.131136 8 386
COVER 2.757895 4.131136 8 386
FAST15 13.466983 3.920128 6 1106
FAST15 0.162656 3.920128 6 1106
FAST16 12.618110 4.032422 8 674
FAST16 0.159073 4.032422 8 674
FAST17 12.883772 4.063581 8 1490
FAST17 0.183131 4.063581 8 1490
FAST18 13.904432 4.085034 8 290
FAST18 0.161078 4.085034 8 290
FAST19 13.762269 4.097054 8 578
FAST19 0.179906 4.097054 8 578
FAST20 15.303927 4.101575 8 434
FAST20 0.213146 4.101575 8 434
FAST21 19.619482 4.104879 8 530
FAST21 0.289158 4.104879 8 530
FAST22 23.187937 4.102448 8 530
FAST22 0.335220 4.102448 8 530
FAST23 24.946655 4.095162 8 914
FAST23 0.396927 4.095162 8 914
FAST24 27.634065 4.114624 8 722
FAST24 0.434278 4.114624 8 722
hg-changelog:
NODICT 0.000006 1.377613
RANDOM 0.601346 2.096785
LEGACY 2.544973 2.058273
COVER 222.639708 2.188654 8 98
COVER 6.072892 2.188654 8 98
FAST15 70.394523 2.127194 8 866
FAST15 0.899766 2.127194 8 866
FAST16 69.845529 2.145401 8 338
FAST16 0.881569 2.145401 8 338
FAST17 69.382431 2.157544 8 194
FAST17 0.943291 2.157544 8 194
FAST18 71.348283 2.173127 8 98
FAST18 1.034765 2.173127 8 98
FAST19 71.380923 2.179527 8 98
FAST19 1.254700 2.179527 8 98
FAST20 72.802714 2.183233 6 98
FAST20 1.368704 2.183233 6 98
FAST21 82.042339 2.180920 8 98
FAST21 2.213864 2.180920 8 98
FAST22 90.666200 2.184297 8 98
FAST22 3.590399 2.184297 8 98
FAST23 108.926377 2.187666 6 98
FAST23 8.723759 2.187666 6 98
FAST24 134.296232 2.189889 6 98
FAST24 19.396532 2.189889 6 98
NODICT 0.000027 1.377613
RANDOM 0.676272 2.096785
LEGACY 2.871887 2.058273
COVER 226.371004 2.188654 8 98
COVER 5.359820 2.188654 8 98
FAST15 66.776425 2.130548 6 386
FAST15 0.796836 2.130548 6 386
FAST16 64.405113 2.144136 8 194
FAST16 0.778969 2.144136 8 194
FAST17 65.062292 2.155745 8 98
FAST17 0.822089 2.155745 8 98
FAST18 65.819104 2.172062 6 98
FAST18 0.804247 2.172062 6 98
FAST19 66.184016 2.179446 6 98
FAST19 0.883526 2.179446 6 98
FAST20 72.900924 2.187017 6 98
FAST20 0.908220 2.187017 6 98
FAST21 77.869945 2.183583 6 146
FAST21 0.932666 2.183583 6 146
FAST22 84.041413 2.182030 6 98
FAST22 1.092310 2.182030 6 98
FAST23 89.539265 2.185291 8 98
FAST23 1.294779 2.185291 8 98
FAST24 97.193482 2.184939 6 98
FAST24 1.270493 2.184939 6 98
hg-manifest:
NODICT 0.000005 1.866385
RANDOM 0.982192 2.309485
LEGACY 9.507729 2.506775
COVER 922.742066 2.582597 8 434
COVER 36.500276 2.582597 8 434
FAST15 163.886717 2.377689 8 1682
FAST15 2.107328 2.377689 8 1682
FAST16 152.684592 2.464814 8 1538
FAST16 2.157789 2.464814 8 1538
FAST17 154.463459 2.539834 6 1826
FAST17 2.282455 2.539834 6 1826
FAST18 155.540044 2.576924 8 1922
FAST18 2.101807 2.576924 8 1922
FAST19 152.650343 2.592479 6 290
FAST19 2.359461 2.592479 6 290
FAST20 174.623634 2.594551 8 194
FAST20 2.870022 2.594551 8 194
FAST21 219.876653 2.597128 6 194
FAST21 4.386269 2.597128 6 194
FAST22 247.986803 2.596971 6 386
FAST22 6.201144 2.596971 6 386
FAST23 276.051806 2.601416 8 194
FAST23 11.613477 2.601416 8 194
FAST24 328.234024 2.602830 6 194
FAST24 26.710364 2.602830 6 194
NODICT 0.000004 1.866385
RANDOM 0.969045 2.309485
LEGACY 8.849052 2.506775
COVER 905.855524 2.582597 8 434
COVER 34.951973 2.582597 8 434
FAST15 154.816926 2.391764 6 1826
FAST15 1.932845 2.391764 6 1826
FAST16 142.197120 2.480738 6 1922
FAST16 1.759330 2.480738 6 1922
FAST17 147.276099 2.548313 6 1682
FAST17 1.819175 2.548313 6 1682
FAST18 164.543366 2.567448 6 386
FAST18 2.728845 2.567448 6 386
FAST19 195.670852 2.581170 8 338
FAST19 2.439487 2.581170 8 338
FAST20 195.716408 2.587062 6 194
FAST20 2.056303 2.587062 6 194
FAST21 211.483191 2.590136 6 242
FAST21 2.983587 2.590136 6 242
FAST22 239.562966 2.591033 6 194
FAST22 3.355746 2.591033 6 194
FAST23 264.547195 2.590403 8 434
FAST23 3.667851 2.590403 8 434
FAST24 296.258379 2.591723 6 290
FAST24 3.858688 2.591723 6 290

View File

@ -266,25 +266,17 @@ static void FASTCOVER_ctx_destroy(FASTCOVER_ctx_t *ctx) {
* Calculate for frequency of hash value of each dmer in ctx->samples
*/
static void FASTCOVER_computeFrequency(U32 *freqs, unsigned f, FASTCOVER_ctx_t *ctx){
/* inCurrSample keeps track of this hash value has already be seen in previous dmers in the same sample*/
BYTE* inCurrSample = (BYTE *)malloc((1 << f) * sizeof(BYTE));
size_t start; /* start of current dmer */
for (unsigned i = 0; i < ctx->nbTrainSamples; i++) {
memset(inCurrSample, 0, (1 << f) * sizeof(*inCurrSample)); /* Reset inCurrSample for each sample */
size_t currSampleStart = ctx->offsets[i];
size_t currSampleEnd = ctx->offsets[i+1];
start = currSampleStart;
while (start + ctx->d <= currSampleEnd) {
const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, ctx->d);
/* if no dmer with same hash value has been seen in current sample */
if (inCurrSample[dmerIndex] == 0) {
inCurrSample[dmerIndex]++;
freqs[dmerIndex]++;
}
freqs[dmerIndex]++;
start++;
}
}
free(inCurrSample);
}
/**