• 热门专题

利用艺术家的整数ID映射将标签转换为向量

作者:  发布日期:2016-03-07 20:38:19
Tag标签:向量  整数  艺术家  
  • /***
     * @author YangXin
     * @info Mapper选择艺术家的整数特征ID然后建立单个特征的向量。这些一维的部分
     * 向量会传给Reducer,后者会将这些向量简单地进行联结,生成一个完整的向量。
     */
    package unitTwelve;
    
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.regex.Pattern;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.io.DefaultStringifier;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.util.GenericsUtil;
    import org.apache.mahout.math.NamedVector;
    import org.apache.mahout.math.SequentialAccessSparseVector;
    import org.apache.mahout.math.VectorWritable;
    
    public class VectorMapper extends Mapper<LongWritable, Text, Text, VectorWritable>{
    	private Pattern splitter;
    	private VectorWritable writer;
    	private Map<String, Integer> dictionary = new HashMap<String, Integer>();
    	
    	@Override
    	protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
    		String[] fields = splitter.split(value.toString());
    		if(fields.length < 4){
    			context.getCounter("Map", "LinesWithErrors").increment(1);
    			return;
    		}
    		String arrtist = fields[1];
    		String tag = fields[2];
    		double weight = Double.parseDouble(fields[3]);
    		NamedVector vector = new NamedVector(new SequentialAccessSparseVector(dictionary.size()), tag);
    		vector.set(dictionary.get(value), weight);
    		writer.set(vector);
    		context.write(new Text(tag), writer);
    	}
    	
    	@Override
    	protected void setup(Context context) throws IOException, InterruptedException{
    		super.setup(context);
    		Configuration conf = context.getConfiguration();
    		DefaultStringifier<Map<String, Integer>> mapStringifier = new DefaultStringifier<Map<String, Integer>>(conf, GenericsUtil.getClass(dictionary));
    		dictionary = mapStringifier.fromString(conf.get("dictionary"));
    		splitter = Pattern.compile("<sep>");
    		writer = new VectorWritable();
    	}
    }
    
About IT165 - 广告服务 - 隐私声明 - 版权申明 - 免责条款 - 网站地图 - 网友投稿 - 联系方式
本站内容来自于互联网,仅供用于网络技术学习,学习中请遵循相关法律法规