记一次java中mongodb操作

2019-04-15 12:48:08  卢浮宫  版权声明:本文为站长原创文章,转载请写明出处


一、业务背景

    今天接收到业务需求是把国航app数据抓取并写入到mongodb数据库中,这里简单记录下。


二、抓取数据地址

    手机app数据地址是通过fiddler 4 来进行抓取的(后面会补充一个fiddler的使用教程),那么我们通过抓取到的url再浏览器中进行调试并拿到数据后请求的url

    简单说一下,这个数据请求地址可能隐藏在调用的js中,具体情况具体分析吧


三、拿到数据源

    ①通过httpclient进行数据请求,获取到数据源(一版是JSON数据或是JSON字符串)

    ②根据需要格式化数据(推荐使用下阿里巴巴的fastJSON来进行数据处理,个人感觉比较好用)


四、写入到数据库

    ①、第一次操作,使用了基本的操作,后面就使用mongoTemplate了

    ②创建一个mongoDBUtil类用来进行所有的数据库操作,核心代码如下

    private static final String MONGO_HOST = "你的数据库地址";
	private static final Integer MONGO_PORT = "你的端口";
	private static final String MONGO_DB_NAME = "你的数据库名称";
	private static final String MONGO_USERNAME = "你的用户名";
	private static final String MONGO_PASSWORD = "你的密码";
	private static final String MONGO_COLLECTION_NAME = "需要操作的数据库表";
	
	public static DBCollection mongoDbConnect() throws UnknownHostException {
		// 获取Mongo客户端
		MongoClient mongoClient = new MongoClient(MONGO_HOST, MONGO_PORT);
		DB db = mongoClient.getDB(MONGO_DB_NAME);
		// 2.1用户名&密码校验
		@SuppressWarnings("deprecation")
		boolean auth = db.authenticate(MONGO_USERNAME,
				MONGO_PASSWORD.toCharArray());
		if (!auth) {
			System.out.println(MONGO_DB_NAME + "连接失败!");
			return null;
		}
		System.out.println(MONGO_DB_NAME + "连接成功!");
		DBCollection collection = db.getCollection(MONGO_COLLECTION_NAME);
		return collection;
	}
	
	//其他表的连接设置  test
	public static DBCollection mongoDbConnectForXa() throws UnknownHostException {
		// 获取Mongo客户端
		MongoClient mongoClient = new MongoClient(MONGO_HOST, MONGO_PORT);
		DB db = mongoClient.getDB(MONGO_DB_NAME);
		// 2.1用户名&密码校验
		@SuppressWarnings("deprecation")
		boolean auth = db.authenticate(MONGO_USERNAME,
				MONGO_PASSWORD.toCharArray());
		if (!auth) {
			System.out.println(MONGO_DB_NAME + "连接失败!");
			return null;
		}
		System.out.println(MONGO_DB_NAME + "连接成功!");
		DBCollection collection = db.getCollection("xaIgnioreFlightno");
		return collection;
	}

	public static void mongoDbDisConnect() throws UnknownHostException{
		//断开并释放资源
		MongoClient mongoClient = new MongoClient(MONGO_HOST, MONGO_PORT);
		mongoClient.close();
		mongoClient = null;
	}
	
	public static void doInsert(DBCollection collection,List<DBObject> dbList){
		//批量写入数据
		collection.insert(dbList);
	}
	
	public static void doRemove(DBCollection collection, BasicDBObject document){
		//删除单条数据
		collection.remove(document);
	}
	
	public static void doRemoveById(DBCollection collection, String airShiftId){
		//根据id删除记录
		BasicDBObject document = new BasicDBObject();
		document.put("_id", airShiftId);
		collection.remove(document);
	}
	
	public static void doRemoveMany(DBCollection collection, List<String> idList){
		//删除一批数据
		BasicDBObject query = new BasicDBObject();  
		for(int i=0;i<idList.size();i++){
			query.put("_id", idList.get(i));
			collection.remove(query);
		}                
	}
	
	public static List<Object> getNeedUploadData(){
		List<Object> rltList = new ArrayList<Object>();
		try {						
			DBCollection collection = mongoDbConnect();
			BasicDBObject searchObj = new BasicDBObject();
			searchObj.put("buildDate", DateUtil.getNowDate());
			DBCursor cursor = collection.find(searchObj).sort(new BasicDBObject("flightno",-1));
			if (cursor.hasNext()) {
				while (cursor.hasNext()) {
					rltList.add(cursor.next());
				}
			} else {
				System.out.println("当前暂无需同步数据!");
			}
		} catch (UnknownHostException e) {
			e.printStackTrace();
		}	
		return rltList;
	}

五、业务操作相关代码如下(以一个数据写入为例):        

        try {
			for (int i = 0; i < ffList.size(); i++) {
				JSONObject jsonObj = JSON.parseObject(ffList.get(i).toString());
				String flightListStr = jsonObj.getString("flightList");
				JSONArray flightListArray = JSON.parseArray(flightListStr);
				JSONObject flightListObj = JSON.parseObject(flightListArray.get(0).toString());
				airId = jsonObj.getString("id");
				airClass = Mycrawler.class.toString();
				airAir = jsonObj.getString("pnr");
				airDpt = jsonObj.getString("orgdstDes").split("-")[0];
				airArr = jsonObj.getString("orgdstDes").split("-")[1];
				airDepartDate = jsonObj.getString("calendarDate");
				airBegindate = airDepartDate + " "+ flightListObj.getString("departureTime");
				airEnddate = airDepartDate + " "+ flightListObj.getString("arrivalTime");
				airFlightno = flightListObj.getString("flightNo");
				airCabin = flightListObj.getString("seatClass");
				airSeatSale = flightListObj.getString("seatNum");
				airTicketprice = jsonObj.getString("ticketPrice");
				airTimeLimit = jsonObj.getString("endTime");				
				// 格式化DBObject数据
				//这里的逻辑只负责元数据写入及删除,不做其他处理(保证数据抓取速度)
//				Map<String, String> checkRlt = dataCheck(collection, airAir,airDepartDate, airBegindate, airFlightno, airCabin,airTicketprice);
//				String checkState = checkRlt.get("checkState");
//				switch (checkState) {
//				case "new":				
//					idList.add(airId);
//					break;
//				case "old":
//				case "newButSame":
//					continue;
//				}
				DBObject document = getDbObjectTmp(collection, airId, airClass,airAir, airDpt, airArr, airDepartDate, airBegindate,airEnddate, airFlightno, airCabin, airSeatSale,airTicketprice, airCreatedate, airTimeLimit);
				dbList.add(document);				
				System.out.println(Thread.currentThread().getName() + " productId_ " + productId + " data " + document);
			}
		} catch (Exception e) {
			ExceptionUtil.logException("解析航班数据异常:", e);
		} finally {
			MongoDBUtil.doInsert(collection, dbList);
		}



更多精彩请关注guangmuhua.com


最新评论: