You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
161 lines
4.0 KiB
161 lines
4.0 KiB
// Searches for good delimiters to cut streams into relatively well sized |
|
// segments. |
|
|
|
#include <stdio.h> |
|
#include <stdlib.h> |
|
#include <time.h> |
|
#include <sys/time.h> |
|
#include <boost/cstdint.hpp> |
|
#include <boost/array.hpp> |
|
#include <boost/random/mersenne_twister.hpp> |
|
#include <boost/thread.hpp> |
|
#include <boost/bind.hpp> |
|
#include <boost/shared_ptr.hpp> |
|
#include <iostream> |
|
#include <vector> |
|
#include <map> |
|
|
|
// Desired size range |
|
#define MIN_DESIRED_SIZE 4096 |
|
#define MAX_DESIRED_SIZE 131072 |
|
|
|
#define DELIMITER_SET_SIZE 1 |
|
typedef boost::array<boost::uint16_t,DELIMITER_SET_SIZE> DelimArray; |
|
|
|
struct BestEntry |
|
{ |
|
DelimArray best; |
|
double bestScore; |
|
std::vector<unsigned char> data; |
|
}; |
|
|
|
boost::mutex bestLock; |
|
boost::mutex outLock; |
|
std::map<std::string,BestEntry> best; |
|
|
|
static void runThread(const std::string &fileName) |
|
{ |
|
char tmp[4096]; |
|
|
|
boost::mt19937 prng; |
|
{ |
|
boost::uint32_t seed; |
|
FILE *ur = fopen("/dev/urandom","r"); |
|
fread((void *)&seed,1,sizeof(seed),ur); |
|
fclose(ur); |
|
prng.seed(seed); |
|
} |
|
|
|
BestEntry *myEntry; |
|
{ |
|
boost::mutex::scoped_lock l(bestLock); |
|
myEntry = &(best[fileName]); |
|
myEntry->bestScore = 99999999.0; |
|
} |
|
|
|
{ |
|
boost::mutex::scoped_lock l(outLock); |
|
|
|
std::cout << "*** Reading test data from: " << fileName << std::endl; |
|
FILE *f = fopen(fileName.c_str(),"r"); |
|
if (f) { |
|
int n; |
|
while ((n = fread((void *)tmp,1,sizeof(tmp),f)) > 0) { |
|
for(int i=0;i<n;++i) |
|
myEntry->data.push_back((unsigned char)tmp[i]); |
|
} |
|
fclose(f); |
|
} |
|
|
|
if (myEntry->data.size() <= 0) { |
|
std::cout << "Error: no data read." << std::endl; |
|
exit(1); |
|
} else std::cout << "*** Read " << myEntry->data.size() << " bytes of test data." << std::endl; |
|
|
|
std::cout.flush(); |
|
} |
|
|
|
DelimArray current; |
|
for(unsigned int i=0;i<DELIMITER_SET_SIZE;++i) |
|
current[i] = (boost::uint16_t)prng(); |
|
|
|
for(;;) { |
|
unsigned long numTooShort = 0; |
|
unsigned long numTooLong = 0; |
|
unsigned long numGood = 0; |
|
|
|
boost::uint32_t shiftRegister = 0; |
|
unsigned long segSize = 0; |
|
for(std::vector<unsigned char>::iterator i=myEntry->data.begin();i!=myEntry->data.end();++i) { |
|
shiftRegister <<= 1; |
|
shiftRegister |= (((boost::uint32_t)*i) & 1); |
|
|
|
++segSize; |
|
|
|
boost::uint16_t transformedShiftRegister = (boost::uint16_t)(shiftRegister); |
|
|
|
for(DelimArray::iterator d=current.begin();d!=current.end();++d) { |
|
if (transformedShiftRegister == *d) { |
|
if (segSize < MIN_DESIRED_SIZE) |
|
++numTooShort; |
|
else if (segSize > MAX_DESIRED_SIZE) |
|
++numTooLong; |
|
else ++numGood; |
|
segSize = 0; |
|
break; |
|
} |
|
} |
|
} |
|
if (segSize) { |
|
if (segSize < MIN_DESIRED_SIZE) |
|
++numTooShort; |
|
else if (segSize > MAX_DESIRED_SIZE) |
|
++numTooLong; |
|
else ++numGood; |
|
} |
|
|
|
if (numGood) { |
|
double score = ((double)(numTooShort + numTooLong)) / ((double)numGood); |
|
|
|
if (score < myEntry->bestScore) { |
|
myEntry->best = current; |
|
myEntry->bestScore = score; |
|
|
|
boost::mutex::scoped_lock l(outLock); |
|
|
|
std::cout << fileName << ": "; |
|
|
|
for(DelimArray::iterator d=current.begin();d!=current.end();++d) { |
|
sprintf(tmp,"0x%.4x",(unsigned int)*d); |
|
if (d != current.begin()) |
|
std::cout << ','; |
|
std::cout << tmp; |
|
} |
|
|
|
std::cout << ": " << numTooShort << " / " << numGood << " / " << numTooLong << " (" << score << ")" << std::endl; |
|
std::cout.flush(); |
|
|
|
if ((numTooShort == 0)&&(numTooLong == 0)) |
|
break; |
|
} |
|
} |
|
|
|
for(DelimArray::iterator i=current.begin();i!=current.end();++i) |
|
*i = (boost::uint16_t)prng(); |
|
} |
|
} |
|
|
|
int main(int argc,char **argv) |
|
{ |
|
std::vector< boost::shared_ptr<boost::thread> > threads; |
|
|
|
for(int i=1;i<argc;++i) { |
|
boost::shared_ptr<boost::thread> t(new boost::thread(boost::bind(&runThread,std::string(argv[i])))); |
|
threads.push_back(t); |
|
} |
|
|
|
for(std::vector< boost::shared_ptr<boost::thread> >::iterator i=threads.begin();i!=threads.end();++i) |
|
(*i)->join(); |
|
|
|
return 0; |
|
}
|
|
|