java 推荐算法

This commit is contained in:
刘亚鹏
2020-07-31 17:40:44 +08:00
parent 24a8ae701d
commit e6979c08d8
14 changed files with 103094 additions and 0 deletions

View File

@@ -0,0 +1,13 @@
package com.tarzan.recommend;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class RecommendSystemApplication {
public static void main(String[] args) {
SpringApplication.run(RecommendSystemApplication.class, args);
}
}

View File

@@ -0,0 +1,122 @@
package com.tarzan.recommend.Service;
import com.tarzan.recommend.dto.ItemDTO;
import com.tarzan.recommend.dto.RelateDTO;
import com.tarzan.recommend.dto.UserDTO;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.assertj.core.util.Lists;
import org.springframework.util.ResourceUtils;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.List;
@Data
@Slf4j
public class FileDataSource {
public final static String folderPath="F:\\ml-100k";
/**
* 方法描述: 读取基础数据
*
* @param
* @Return {@link List< RelateDTO>}
* @throws
* @author tarzan
* @date 2020年07月31日 16:53:40
*/
public static List<RelateDTO> getData() {
List<RelateDTO> relateList = Lists.newArrayList();
try {
FileInputStream out = new FileInputStream(folderPath+"\\u.data");
InputStreamReader reader = new InputStreamReader(out, StandardCharsets.UTF_8);
BufferedReader in = new BufferedReader(reader);
String line = null;
while ((line = in.readLine()) != null) {
String newline = line.replaceAll("[\t]", " ");
String[] ht = newline.split(" ");
Integer userId = Integer.parseInt(ht[0]);
Integer movieId = Integer.parseInt(ht[1]);
Integer rating = Integer.parseInt(ht[2]);
RelateDTO dto = new RelateDTO(userId, movieId, rating);
relateList.add(dto);
}
} catch (IOException e) {
log.error(e.getMessage());
}
return relateList;
}
/**
* 方法描述: 读取用户数据
*
* @param
* @Return {@link List< UserDTO>}
* @throws
* @author tarzan
* @date 2020年07月31日 16:54:51
*/
public static List<UserDTO> getUserData() {
List<UserDTO> userList = Lists.newArrayList();
try {
FileInputStream out = new FileInputStream(folderPath+"\\u.user");
InputStreamReader reader = new InputStreamReader(out, StandardCharsets.UTF_8);
BufferedReader in = new BufferedReader(reader);
String line = null;
while ((line = in.readLine()) != null) {
String newline = line.replaceAll("[\t]", " ");
String[] ht = newline.split("\\|");
Integer id = Integer.parseInt(ht[0]);
Integer age = Integer.parseInt(ht[1]);
String sex = ht[2];
String profession = ht[3];
String postcode = ht[4];
UserDTO dto = new UserDTO(id, age, sex, profession, postcode);
userList.add(dto);
}
} catch (IOException e) {
log.error(e.getMessage());
}
return userList;
}
/**
* 方法描述: 读取电影数据
*
* @param
* @Return {@link List< ItemDTO>}
* @throws
* @author tarzan
* @date 2020年07月31日 16:54:22
*/
public static List<ItemDTO> getItemData() {
List<ItemDTO> itemList = Lists.newArrayList();
try {
FileInputStream out = new FileInputStream(folderPath+"\\u.item");
InputStreamReader reader = new InputStreamReader(out, StandardCharsets.UTF_8);
BufferedReader in = new BufferedReader(reader);
String line = null;
while ((line = in.readLine()) != null) {
String newline = line.replaceAll("[\t]", " ");
String[] ht = newline.split("\\|");
Integer id = Integer.parseInt(ht[0]);
String name = ht[1];
String date = ht[2];
String link = ht[3];
ItemDTO dto = new ItemDTO(id, name, date, link);
itemList.add(dto);
}
} catch (IOException e) {
log.error(e.getMessage());
}
return itemList;
}
}

View File

@@ -0,0 +1,44 @@
package com.tarzan.recommend.Service;
import com.tarzan.recommend.core.CoreMath;
import com.tarzan.recommend.dto.ItemDTO;
import com.tarzan.recommend.dto.RelateDTO;
import java.util.List;
import java.util.stream.Collectors;
/**
* 推荐服务
*
* @author liu yapeng
* @version 1.0
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
* @date 2020/7/31$ 16:18$
* @since JDK1.8
*/
public class Recommend{
/**
* 方法描述: 猜你喜欢
*
* @param
* @Return {@link List< ItemDTO>}
* @throws
* @author tarzan
* @date 2020年07月31日 17:28:06
*/
public static List<ItemDTO> guessYouLike(){
CoreMath coreMath = new CoreMath();
List<RelateDTO> data= FileDataSource.getData();
List<Integer> recommendations = coreMath.recommend(2, data);
List<ItemDTO> itemList= FileDataSource.getItemData().stream().filter(e->recommendations.contains(e.getId())).collect(Collectors.toList());
return itemList;
}
public static void main(String[] args) {
List<ItemDTO> itemList= Recommend.guessYouLike();
System.out.println("------猜你可能喜欢---------------下列电影="+itemList.stream().map(e->e.getName()).collect(Collectors.toList()));
}
}

View File

@@ -0,0 +1,109 @@
package com.tarzan.recommend.core;
import com.tarzan.recommend.dto.RelateDTO;
import org.assertj.core.util.Lists;
import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* 核心算法
*
* @author tarzan
* @version 1.0
* @company 洛阳图联科技有限公司
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
* @date 2020/7/31$ 15:21$
* @since JDK1.8
*/
public class CoreMath {
public List<Integer> recommend(Integer userId, List<RelateDTO> list) {
//找到最近邻用户id
Map<Double, Integer> distances = computeNearestNeighbor(userId, list);
Integer nearest = distances.values().iterator().next();
Map<Integer, List<RelateDTO>> userMap=list.stream().collect(Collectors.groupingBy(RelateDTO::getUseId));
//最近邻用户看过电影列表
List<Integer> neighborItemList = userMap.get(nearest).stream().map(e->e.getModuleId()).collect(Collectors.toList());
//指定用户看过电影列表
List<Integer> userItemList = userMap.get(userId).stream().map(e->e.getModuleId()).collect(Collectors.toList());;
//找到最近邻看过,但是该用户没看过的电影,计算推荐,放入推荐列表
List<Integer> recommendList = new ArrayList<>();
for (Integer item : neighborItemList) {
if (!userItemList.contains(item)) {
recommendList.add(item);
}
}
Collections.sort(recommendList);
return recommendList;
}
/**
* 在给定userId的情况下计算其他用户和它的相关系数并排序
* @param userId
* @param list
* @return
*/
private Map<Double, Integer> computeNearestNeighbor(Integer userId, List<RelateDTO> list) {
Map<Integer, List<RelateDTO>> userMap=list.stream().collect(Collectors.groupingBy(RelateDTO::getUseId));
Map<Double, Integer> distances = new TreeMap<>();
userMap.forEach((k,v)->{
if(k!=userId){
double distance = pearson_dis(v,userMap.get(userId));
distances.put(distance, k);
}
});
return distances;
}
/**
* 计算两个序列间的相关系数
*
* @param xList
* @param yList
* @return
*/
private double pearson_dis(List<RelateDTO> xList, List<RelateDTO> yList) {
List<Integer> xs= Lists.newArrayList();
List<Integer> ys= Lists.newArrayList();
xList.forEach(x->{
yList.forEach(y->{
if(x.getModuleId()==y.getModuleId()){
xs.add(x.getIndex());
ys.add(y.getIndex());
}
});
});
return getRelate(xs,ys);
}
/**
* 方法描述: 皮尔森pearson相关系数计算
*
* @param xs
* @param ys
* @Return {@link Double}
* @throws
* @author tarzan
* @date 2020年07月31日 17:03:20
*/
public static Double getRelate(List<Integer> xs, List<Integer> ys){
int n=xs.size();
double Ex= xs.stream().mapToDouble(x->x).sum();
double Ey=ys.stream().mapToDouble(y->y).sum();
double Ex2=xs.stream().mapToDouble(x->Math.pow(x,2)).sum();
double Ey2=ys.stream().mapToDouble(y->Math.pow(y,2)).sum();
double Exy= IntStream.range(0,n).mapToDouble(i->xs.get(i)*ys.get(i)).sum();
double numerator=Exy-Ex*Ey/n;
double denominator=Math.sqrt((Ex2-Math.pow(Ex,2)/n)*(Ey2-Math.pow(Ey,2)/n));
if (denominator==0) return 0.0;
return numerator/denominator;
}
}

View File

@@ -0,0 +1,26 @@
package com.tarzan.recommend.dto;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* 业务项
*
* @author liu yapeng
* @version 1.0
* @company 洛阳图联科技有限公司
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
* @date 2020/7/31$ 15:02$
* @since JDK1.8
*/
@Data
@NoArgsConstructor
@AllArgsConstructor
public class ItemDTO {
private Integer id;
private String name;
private String date;
private String link;
}

View File

@@ -0,0 +1,29 @@
package com.tarzan.recommend.dto;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* 关系数据
*
* @author liu yapeng
* @version 1.0
* @company 洛阳图联科技有限公司
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
* @date 2020/7/31$ 14:51$
* @since JDK1.8
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class RelateDTO {
//用户id
private Integer useId;
//业务id
private Integer moduleId;
//指数
private Integer index;
}

View File

@@ -0,0 +1,32 @@
package com.tarzan.recommend.dto;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
/**
* 用户对象
*
* @author liu yapeng
* @version 1.0
* @company 洛阳图联科技有限公司
* @copyright (c) 2019 LuoYang TuLian Co'Ltd Inc. All rights reserved.
* @date 2020/7/31$ 14:55$
* @since JDK1.8
*/
@Data
@AllArgsConstructor
@NoArgsConstructor
public class UserDTO {
//主键
private Integer id;
//年纪
private Integer age;
//性别
private String sex;
//职业
private String profession;
//邮编
private String postcode;
}