import com.google.common.collect.Interner;
import com.google.common.collect.Interners;

import java.io.IOException;
import java.util.Random;

/**
 * Compares performance of Guava Interner and our FALF (fixed array, lock free)
 * interner. To use the FALF interner, run the main method with a single 'true'
 * command line argument. Otherwise, the Guava Weak Interner will be used.
 *
 * This benchmark simulates a multithreaded application that keeps generating
 * strings and retains in memory a fixed number of most recently generated
 * strings. The generated strings are random, with Gaussian distribution.
 * That simulates a situation that occurs frequently in real life, where strings
 * (or, generally, any objects) with certain values, for example "true", "yes"
 * etc. are replicated a very large number of times, whereas for many others
 * only one or two copies exist.
 *
 * I ran this benchmark on JDK 1.8.0_151, on MacBook Pro with 2.5 GHz
 * Intel Core i7 CPU and 16GB of RAM. Average time from 3 runs was taken. On
 * the last run, used heap size was measured using jmap -histo:live
 *
 * Guava: average time 10221 ms, used heap 14,971KB
 * FALF: average time 8520 ms, used heap 564KB
 *
 * A huge difference in used heap is explained by the fact that the Guava
 * interner stores in memory each distinct object given to it. The weak
 * references help it to eventually get rid of objects that are no longer
 * in use by the app (not referenced strongly from anywhere), but still the
 * number of size of its internal implementation objects remains very large.
 * In contrast, the FALF interner's implementation has a small, constant
 * size, and its design guarantees that most of the objects that it keeps
 * in memory are those that would otherwise have the maximum number of
 * duplicates.
 *
 * @author Mikhail Dmitriev
 */
public class InternerPerfTest {

  private Interner<String> guavaInterner;
  private FALFInterner<String> falfInterner;
  private boolean useFALF;

  private static final int N_THREADS = 8;
  private static final int N_DISTINCT_OBJS = 100 * 1000;
  private static final int N_RETAINED_OBJS_PER_THREAD = 50 * 1000;
  private static final int N_ITERATIONS = 20 * 1000 * 1000;

  public static void main(String args[]) throws IOException {
    boolean useFALF;
    if (args.length == 1 && "true".equals(args[0])) {
      useFALF = true;
      System.out.println("Using fixed-array, lock-free interner.");
    } else {
      useFALF = false;
      System.out.println("Using Guava WeakInterner");
    }

    InternerPerfTest test = new InternerPerfTest(useFALF);

    long time = System.currentTimeMillis();
    test.run();
    time = System.currentTimeMillis() - time;
    System.out.println("Test complete in " + time + " ms");

    // Here we give the user time to measure used heap size of the app.
    // It can be done, for example, using 'jmap -histo:live <JVM pid>' command.
    System.out.println("Press any key to exit the test...");
    System.in.read();

    System.out.println(test);  // Prevent the JVM from GCing this object early
  }

  InternerPerfTest(boolean useFALF) {
    this.useFALF = useFALF;
    if (useFALF) falfInterner = new FALFInterner(2001);
    else guavaInterner = Interners.newWeakInterner();
  }

  void run() {
    Worker[] workers = new Worker[N_THREADS];
    for (int i = 0; i < N_THREADS; i++) {
      workers[i] = new Worker();
      workers[i].start();
    }

    for (int i = 0; i < N_THREADS; i++) {
      try {
        workers[i].join();
      } catch (InterruptedException e) {
        throw new RuntimeException(e);
      }
    }
  }

  String intern(String s) {
    if (useFALF) return falfInterner.intern(s);
    else return guavaInterner.intern(s);
  }

  class Worker extends Thread {

    String[] retainedData = new String[N_RETAINED_OBJS_PER_THREAD];
    Random randGen = new Random();

    public void run() {
      for (int i = 0; i < N_ITERATIONS; i++) {
        // Generate strings with random values that obey Gaussian distribution.
        // That results in some strings occurring much more frequently than others.
        // That's what most often happens in real life.
        int n = (int) (Math.abs(randGen.nextGaussian()) * N_DISTINCT_OBJS);
        String s = n + " some senseless string contents " + n;
        retainedData[i % N_RETAINED_OBJS_PER_THREAD] = intern(s);
      }
    }
  }

  /** Fixed array, lock free object interner */
  static class FALFInterner<T> {

    static final int MAXIMUM_CAPACITY = 1 << 30;

    private Object[] cache;

    FALFInterner(int expectedCapacity) {
      cache = new Object[tableSizeFor(expectedCapacity)];
    }

    T intern(T obj) {
      int slot = hash(obj) & (cache.length - 1);
      T cachedObj = (T) cache[slot];
      if (cachedObj != null && cachedObj.equals(obj)) return cachedObj;
      else {
        cache[slot] = obj;
        return obj;
      }
    }

    /** Copied from java.util.HashMap */
    static int hash(Object key) {
      int h;
      return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16);
    }

    /**
     * Returns a power of two size for the given target capacity.
     * Copied from java.util.HashMap.
     */
    static int tableSizeFor(int cap) {
      int n = cap - 1;
      n |= n >>> 1;
      n |= n >>> 2;
      n |= n >>> 4;
      n |= n >>> 8;
      n |= n >>> 16;
      return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1;
    }
  }
}
