0
点赞
收藏
分享

微信扫一扫

LintCode 大数据专项题集

洛茄 2022-05-10 阅读 43


499 · 单词计数 (Map Reduce版本)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
*/
public class WordCount {

public static class Map {
public void map(String key, String value, OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, int value);
String[] words = value.split(" ");
for (String word : words) {
output.collect(word, 1);
}
}
}

public static class Reduce {
public void reduce(String key, Iterator<Integer> values,
OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, int value);
int freq = 0;
while (values.hasNext()) {
Integer value = values.next();
freq += value;
}
output.collect(key, freq);
}
}
}

503 · 乱序字符串 (Map Reduce版本)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
*/
public class Anagram {

public static class Map {
public void map(String key, String value,
OutputCollector<String, String> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, String value);
String[] words = value.split(" ");
for (String word : words) {
char[] chars = word.toCharArray();
Arrays.sort(chars);
String sortedWord = String.valueOf(chars);
output.collect(sortedWord, word);
}
}
}

public static class Reduce {
public void reduce(String key, Iterator<String> values,
OutputCollector<String, List<String>> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, List<String> value);
ArrayList<String> words = new ArrayList<>();
while (values.hasNext()) {
words.add(values.next());
}
output.collect(key, words);
}
}
}

504 · 倒排索引 (Map Reduce版本)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
* Definition of Document:
* class Document {
* public int id;
* public String content;
* }
*/
public class InvertedIndex {

public static class Map {
public void map(String key, Document value,
OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, int value);
StringTokenizer stringTokenizer = new StringTokenizer(value.content);
while (stringTokenizer.hasMoreTokens()) {
output.collect(stringTokenizer.nextToken(), value.id);
}
}
}

public static class Reduce {
public void reduce(String key, Iterator<Integer> values,
OutputCollector<String, List<Integer>> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, List<Integer> value);
ArrayList<Integer> index = new ArrayList<>();
int previousId = -1;
while (values.hasNext()) {
Integer currentId = values.next();
if (currentId != previousId) {
index.add(currentId);
previousId = currentId;
}
}
output.collect(key, index);
}
}
}

537 · N-Gram (Map Reduce)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
*/
public class NGram {

public static class Map {
public void map(String s, int n, String str,
OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, Integer value);
for (int i = 0; i <= str.length() - n; i++) {
String gram = str.substring(i, i + n);
output.collect(gram, 1);
}
}
}

public static class Reduce {
public void reduce(String key, Iterator<Integer> values,
OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, int value);
int freq = 0;
while (values.hasNext()) {
Integer value = values.next();
freq += value;
}
output.collect(key, freq);
}
}
}

549 · 最常使用的k个单词(Map Reduce)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
* Definition of Document:
* class Document {
* public int id;
* public String content;
* }
*/
public class TopKFrequentWords {

public static class Map {
public void map(String key, Document value,
OutputCollector<String, Integer> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, int value);
String[] words = value.content.split("\\s+");
for (String word : words) {
output.collect(word, 1);
}
}
}

public static class Reduce {
static class Pair {
private String key;
private Integer value;

Pair() {}

Pair(String key, int value) {
this.key = key;
this.value = value;
}

public String getKey() {
return key;
}

public Integer getValue() {
return value;
}

public void setKey(String key) {
this.key = key;
}

public void setValue(Integer value) {
this.value = value;
}
}

private PriorityQueue<Pair> heap;
private int k;

public void setup(int k) {
// initialize your data structure here
this.heap = new PriorityQueue<>((o1, o2) -> {
if (o1.getValue().equals(o2.getValue())) {
return o1.getKey().compareTo(o2.getKey());
} else {
return o2.getValue().compareTo(o1.getValue());
}
});
this.k = k;
}

public void reduce(String key, Iterator<Integer> values) {
// Write your code here
int freq = 0;
while (values.hasNext()) {
freq += values.next();
}
heap.add(new Pair(key, freq));
}

public void cleanup(OutputCollector<String, Integer> output) {
// Output the top k pairs <word, times> into output buffer.
// Ps. output.collect(String key, Integer value);
for (int i = 0; i < k && !heap.isEmpty(); i++) {
Pair pair = heap.poll();
output.collect(pair.getKey(), pair.getValue());
}
heap.clear();
}
}
}

554 · 排序整数 (Map Reduce版)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
*/
public class SortIntegers {

public static class Map {
public void map(int key, List<Integer> value,
OutputCollector<String, List<Integer>> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, List<Integer> value);
Collections.sort(value);
output.collect("key", value);
}
}

public static class Reduce {
public void reduce(String key, List<List<Integer>> values,
OutputCollector<String, List<Integer>> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, List<Integer> value);
PriorityQueue<List<Integer>> heap = new PriorityQueue<>(new Comparator<List<Integer>>() {
@Override
public int compare(List<Integer> o1, List<Integer> o2) {
return o1.get(0) - o2.get(0);
}
});

for (List<Integer> value : values) {
if (value.size() > 0) {
heap.add(value);
}
}

ArrayList<Integer> sortedNumbers = new ArrayList<>();
while (heap.size() > 0) {
List<Integer> list = heap.poll();
Integer number = list.get(0);
sortedNumbers.add(number);
list.remove(0);
if (list.size() > 0) {
heap.add(list);
}
}

output.collect("key", sortedNumbers);
}
}
}

1787 · Google Suggestion (Map Reduce)

/**
* Definition of OutputCollector:
* class OutputCollector<K, V> {
* public void collect(K key, V value);
* // Adds a key/value pair to the output buffer
* }
* Definition of Document:
* class Document {
* public int count;
* public String content;
* }
*
*class Pair {
* private String content;
* private int count;
*
* Pair(String key, int value) {
* this.key = key;
* this.value = value;
* }
* public String getContent(){
* return this.content;
* }
* public int getCount(){
* return this.count;
* }
*
*}
*/
public class GoogleSuggestion {

public static class Map {
public void map(Document value,
OutputCollector<String, Pair> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, Pair value);
for (int i = 1; i <= value.content.length(); ++i) {
String prefix = value.content.substring(0, i);
output.collect(prefix, new Pair(value.content, value.count));
}
}
}

public static class Reduce {
private PriorityQueue<Pair> heap;

public void setup() {
// initialize your data structure here
this.heap = new PriorityQueue<>(new Comparator<Pair>() {
@Override
public int compare(Pair o1, Pair o2) {
if (o1.getCount() == o2.getCount()) {
return o1.getContent().compareTo(o2.getContent());
} else {
return o2.getCount() - o1.getCount();
}
}
});
}
public void reduce(String key, Iterator<Pair> values, OutputCollector<String, Pair> output) {
// Write your code here
// Output the results into output buffer.
// Ps. output.collect(String key, Pair value);
setup();

while (values.hasNext()) {
this.heap.add(values.next());
}

for (int i = 0; i < 10 && !heap.isEmpty(); i++) {
output.collect(key, heap.poll());
}

this.heap.clear();
}
}
}


举报

相关推荐

0 条评论