/*
 * Decompiled with CFR 0.152.
 */
package org.commoncrawl.util.shared;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.CharBuffer;
import java.util.Iterator;
import java.util.Set;
import org.commoncrawl.util.shared.FPGenerator;
import org.commoncrawl.util.shared.LongOpenHashSet;
import org.commoncrawl.util.shared.MurmurHash;
import org.commoncrawl.util.shared.Shingle;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class SimHash {
    public static final int HASH_SIZE = 64;
    public static final long HASH_RANGE = 66L;
    public static MurmurHash hasher = new MurmurHash();
    private static final int FIXED_BGRAM_LENGTH = 8;
    private static final int FIXED_CGRAM_LENGTH = 4;

    public static long computeOptimizedSimHashForString(String s) {
        return SimHash.computeOptimizedSimHashForString(CharBuffer.wrap(s));
    }

    public static long computeOptimizedSimHashForString(CharBuffer s) {
        LongOpenHashSet shingles = new LongOpenHashSet(Math.min(s.length(), 100000));
        int length = s.length();
        for (int i = 0; i < length - 4 + 1; ++i) {
            long shingle = s.charAt(i);
            shingle <<= 16;
            shingle |= (long)s.charAt(i + 1);
            shingle <<= 16;
            shingle |= (long)s.charAt(i + 2);
            shingle <<= 16;
            shingles.add(shingle |= (long)s.charAt(i + 3));
        }
        int[] v = new int[64];
        byte[] longAsBytes = new byte[8];
        Iterator i$ = shingles.iterator();
        while (i$.hasNext()) {
            long shingle = (Long)i$.next();
            longAsBytes[0] = (byte)(shingle >> 56);
            longAsBytes[1] = (byte)(shingle >> 48);
            longAsBytes[2] = (byte)(shingle >> 40);
            longAsBytes[3] = (byte)(shingle >> 32);
            longAsBytes[4] = (byte)(shingle >> 24);
            longAsBytes[5] = (byte)(shingle >> 16);
            longAsBytes[6] = (byte)(shingle >> 8);
            longAsBytes[7] = (byte)shingle;
            long longHash = FPGenerator.std64.fp(longAsBytes, 0, 8);
            for (int i = 0; i < 64; ++i) {
                boolean bitSet = (longHash >> i & 1L) == 1L;
                int n = i;
                v[n] = v[n] + (bitSet ? 1 : -1);
            }
        }
        long simhash = 0L;
        for (int i = 0; i < 64; ++i) {
            if (v[i] <= 0) continue;
            simhash |= 1L << i;
        }
        return simhash;
    }

    public static long computeOptimizedSimHashForBytes(byte[] data, int offset, int length) {
        LongOpenHashSet shingles = new LongOpenHashSet(Math.min(length / 8, 100000));
        for (int i = offset; i < length - 8 + 1; ++i) {
            int pos = i;
            long shingle = data[pos++];
            shingle <<= 8;
            shingle |= (long)data[pos++];
            shingle <<= 8;
            shingle |= (long)data[pos++];
            shingle <<= 8;
            shingle |= (long)data[pos++];
            shingle <<= 8;
            shingle |= (long)data[pos++];
            shingle <<= 8;
            shingle |= (long)data[pos++];
            shingle <<= 8;
            shingle |= (long)data[pos++];
            shingle <<= 8;
            shingles.add(shingle |= (long)data[pos]);
        }
        int[] v = new int[64];
        byte[] longAsBytes = new byte[8];
        Iterator i$ = shingles.iterator();
        while (i$.hasNext()) {
            long shingle = (Long)i$.next();
            longAsBytes[0] = (byte)(shingle >> 56);
            longAsBytes[1] = (byte)(shingle >> 48);
            longAsBytes[2] = (byte)(shingle >> 40);
            longAsBytes[3] = (byte)(shingle >> 32);
            longAsBytes[4] = (byte)(shingle >> 24);
            longAsBytes[5] = (byte)(shingle >> 16);
            longAsBytes[6] = (byte)(shingle >> 8);
            longAsBytes[7] = (byte)shingle;
            long longHash = FPGenerator.std64.fp(longAsBytes, 0, 8);
            for (int i = 0; i < 64; ++i) {
                boolean bitSet = (longHash >> i & 1L) == 1L;
                int n = i;
                v[n] = v[n] + (bitSet ? 1 : -1);
            }
        }
        long simhash = 0L;
        for (int i = 0; i < 64; ++i) {
            if (v[i] <= 0) continue;
            simhash |= 1L << i;
        }
        return simhash;
    }

    public static long computeSimHashFromString(Set<String> shingles) {
        int[] v = new int[64];
        for (String shingle : shingles) {
            byte[] bytes = shingle.getBytes();
            long longHash = FPGenerator.std64.fp(bytes, 0, bytes.length);
            for (int i = 0; i < 64; ++i) {
                boolean bitSet = (longHash >> i & 1L) == 1L;
                int n = i;
                v[n] = v[n] + (bitSet ? 1 : -1);
            }
        }
        long simhash = 0L;
        for (int i = 0; i < 64; ++i) {
            if (v[i] <= 0) continue;
            simhash |= 1L << i;
        }
        return simhash;
    }

    public static int hammingDistance(long hash1, long hash2) {
        long bits = hash1 ^ hash2;
        int count = 0;
        while (bits != 0L) {
            bits &= bits - 1L;
            ++count;
        }
        return count;
    }

    public static long rotate(long hashValue) {
        return hashValue << 1 | hashValue >>> -1;
    }

    public static void main(String[] args) {
        try {
            File file1 = new File(args[0]);
            File file2 = new File(args[1]);
            byte[] data1 = new byte[(int)file1.length()];
            byte[] data2 = new byte[(int)file2.length()];
            FileInputStream stream1 = new FileInputStream(file1);
            FileInputStream stream2 = new FileInputStream(file2);
            stream1.read(data1);
            stream2.read(data2);
            String string1 = new String(data1);
            String string2 = new String(data2);
            for (int i = 0; i < 100; ++i) {
                long timeStart = System.currentTimeMillis();
                long simhash1 = SimHash.computeSimHashFromString(Shingle.shingles(string1));
                long timeEnd = System.currentTimeMillis();
                System.out.println("Old Calc for Document A Took:" + (timeEnd - timeStart));
                timeStart = System.currentTimeMillis();
                long simhash2 = SimHash.computeSimHashFromString(Shingle.shingles(string2));
                timeEnd = System.currentTimeMillis();
                System.out.println("Old Calc for Document B Took:" + (timeEnd - timeStart));
                timeStart = System.currentTimeMillis();
                long simhash3 = SimHash.computeOptimizedSimHashForBytes(data1, 0, data1.length);
                timeEnd = System.currentTimeMillis();
                System.out.println("New Calc for Document A Took:" + (timeEnd - timeStart));
                timeStart = System.currentTimeMillis();
                long simhash4 = SimHash.computeOptimizedSimHashForBytes(data2, 0, data2.length);
                timeEnd = System.currentTimeMillis();
                System.out.println("New Calc for Document B Took:" + (timeEnd - timeStart));
                int hammingDistance = SimHash.hammingDistance(simhash1, simhash2);
                int hammingDistance2 = SimHash.hammingDistance(simhash3, simhash4);
                System.out.println("hammingdistance Doc (A) to Doc(B) OldWay:" + hammingDistance);
                System.out.println("hammingdistance Doc (A) to Doc(B) NewWay:" + hammingDistance2);
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

