`

海量数据之 统计ip频率top

 
阅读更多

 思路与步骤

先做hashMap(ip,count)统计出现的次数。

遍历hashMap,根据次数为权值插入二叉树。

中序遍历树。

注:

前序(根左右),

中序(左根右),

后序(左右根)

 

节点定义

 

 

package tree;

public class Node {
	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;
	Node parent;
	Node left;
	Node right;
	long value;
	Object content;

	public Node getLeft() {
		return left;
	}

	public void setLeft(Node left) {
		this.left = left;
	}

	public Node getRight() {
		return right;
	}

	public Node getParent() {
		return parent;
	}

	public void setParent(Node parent) {
		this.parent = parent;
	}

	public void setRight(Node right) {
		this.right = right;
	}

	public long getValue() {
		return value;
	}

	public void setValue(long value) {
		this.value = value;
	}

	public void setContent(Object content) {
		this.content = content;
	}

	public Object getContent() {
		return content;
	}

}

 

 

树的定义

 

 

package tree;

/**
 * 
 * @author xiaofancn@gmail.com
 * 
 * @deprecated 二叉树是左边为权值最大。
 * 
 */

public class BinaryTree {

	CallBack callBack = null;

	/**
	 * 
	 */
	private static final long serialVersionUID = 1L;

	private Node root;

	long size = 0;

	public void addNode(Node node) {

		if (node == null)
			return;

		if (root == null) {
			root = node;
			size++;
			return;
		}

		Node point = root;

		do {
			if (point.value < node.value) {
				if (point.left == null) {
					node.setParent(point);
					point.left = node;
					size++;
					break;
				}
				point = point.left;
			} else {
				if (point.right == null) {
					node.setParent(point);
					point.right = node;
					size++;
					break;
				}
				point = point.right;
			}
		} while (point != null);

	}

	public Node getLeftLast() {
		if (root == null) {
			return null;
		}

		Node point = root;

		while (point.left != null) {
			point = point.left;
		}
		return point;
	}

	public long size() {
		return size;
	}

	public Node getRoot() {
		return root;
	}

	public Node getRightLast() {
		Node point = root;
		while (point.right != null) {
			point = point.right;
		}
		return point;
	}

	public void setCallBack(CallBack callBack) {
		this.callBack = callBack;
	}

	/**
	 * 
	 * 中序遍历
	 * 
	 * @param node
	 */

	public void inItertor(Node node) {
		if (node.getLeft() != null) {
			inItertor(node.getLeft());
		}
		callBack.doThing(node);
		if (node.getRight() != null) {
			inItertor(node.getRight());
		}

	}

	public interface CallBack {
		void doThing(Node node);
	}

}

 

 

测试代码

 

 

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import tree.BinaryTree;
import tree.BinaryTree.CallBack;
import tree.Node;

public class ClientMain {
	static int index = 0;
	static int top = 15;

	public static void main(String[] args) {

		BinaryTree binaryTree = new BinaryTree();
		Map<String, CountIP> map = new HashMap<String, CountIP>();

		for (int i = 0; i < 10000 * 10000; i++) {
			String ip = "192.168." + (int) (Math.random() * 254 + 1) + "."
					+ (int) (Math.random() * 254 + 1);
			if (map.get(ip) == null) {
				CountIP cip = new CountIP();
				cip.setCount(1);
				cip.setIp(ip);
				map.put(ip, cip);
			} else {
				CountIP cip = map.get(ip);
				cip.setCount(cip.getCount() + 1);
				map.put(ip, cip);
			}

		}

		Set<String> set = map.keySet();
		for (String ipkey : set) {
			Node node = new Node();
			node.setValue(map.get(ipkey).getCount());
			node.setContent(map.get(ipkey).getIp());
			binaryTree.addNode(node);
		}

		System.out.println(binaryTree.size());
		Node point = binaryTree.getRoot();

		binaryTree.setCallBack(new CallBack() {
			@Override
			public void doThing(Node node) {
				if (index < top) {
					index++;
					System.out.println(node.getContent() + "===="
							+ node.getValue());
				}
			}
		});

		binaryTree.inItertor(point);
	}

	private static class CountIP {
		String ip;
		long count;

		public long getCount() {
			return count;
		}

		public void setCount(long count) {
			this.count = count;
		}

		public String getIp() {
			return ip;
		}

		public void setIp(String ip) {
			this.ip = ip;
		}
	}

}
分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics