[RDF data]
Home | Example Publications
PropertyValue
dcterms:bibliographicCitation <http://dblp.uni-trier.de/rec/bibtex/journals/corr/abs-2312-09244>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Adam_Fisch>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Ahmad_Beirami>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Alekh_Agarwal>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Alex_D%27Amour>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Chirag_Nagpal>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Deepak_Ramachandran>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Dj_Dvijotham>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Jacob_Eisenstein>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Jonathan_Berant>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Katherine_A._Heller>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Peter_Shaw>
dc:creator <https://dblp.l3s.de/d2r/resource/authors/Stephen_Pfohl>
foaf:homepage <http://dx.doi.org/doi.org%2F10.48550%2FarXiv.2312.09244>
foaf:homepage <https://doi.org/10.48550/arXiv.2312.09244>
dc:identifier DBLP journals/corr/abs-2312-09244 (xsd:string)
dc:identifier DOI doi.org%2F10.48550%2FarXiv.2312.09244 (xsd:string)
dcterms:issued 2023 (xsd:gYear)
swrc:journal <https://dblp.l3s.de/d2r/resource/journals/corr>
rdfs:label Helping or Herding? Reward Model Ensembles Mitigate but do not Eliminate Reward Hacking. (xsd:string)
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Adam_Fisch>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Ahmad_Beirami>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Alekh_Agarwal>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Alex_D%27Amour>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Chirag_Nagpal>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Deepak_Ramachandran>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Dj_Dvijotham>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Jacob_Eisenstein>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Jonathan_Berant>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Katherine_A._Heller>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Peter_Shaw>
foaf:maker <https://dblp.l3s.de/d2r/resource/authors/Stephen_Pfohl>
owl:sameAs <http://bibsonomy.org/uri/bibtexkey/journals/corr/abs-2312-09244/dblp>
owl:sameAs <http://dblp.rkbexplorer.com/id/journals/corr/abs-2312-09244>
rdfs:seeAlso <http://dblp.uni-trier.de/db/journals/corr/corr2312.html#abs-2312-09244>
rdfs:seeAlso <https://doi.org/10.48550/arXiv.2312.09244>
dc:title Helping or Herding? Reward Model Ensembles Mitigate but do not Eliminate Reward Hacking. (xsd:string)
dc:type <http://purl.org/dc/dcmitype/Text>
rdf:type swrc:Article
rdf:type foaf:Document
swrc:volume abs/2312.09244 (xsd:string)